mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 15:12:20 +00:00
feat: location schema, architecture, ckanaction update, new test
This commit is contained in:
parent
d01e05ab48
commit
baeb09acb7
7 changed files with 105 additions and 62 deletions
5
Cargo.lock
generated
5
Cargo.lock
generated
|
|
@ -178,14 +178,15 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "ckanaction"
|
||||
version = "0.1.4"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13ec17a3808b02c993f8fae3be6dc8a7f153b44b17cdebb2e2a9b0f836cb8e03"
|
||||
checksum = "d2e5332c456e22de38ddf1c8ed0cdfaeba4ab432f5de8cd971b6450ef62f8add"
|
||||
dependencies = [
|
||||
"bon",
|
||||
"reqwest 0.12.28",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ edition = "2024"
|
|||
|
||||
[dependencies]
|
||||
anyhow = "1.0.102"
|
||||
ckanaction = "0.1.4"
|
||||
ckanaction = "0.2.0"
|
||||
jsonschema = "0.46.4"
|
||||
serde_json = "1.0.149"
|
||||
tokio = { version = "1.52.1", features = ["full"] }
|
||||
|
|
|
|||
3
src/jsonld.rs
Normal file
3
src/jsonld.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value {
|
||||
todo!()
|
||||
}
|
||||
47
src/lib.rs
47
src/lib.rs
|
|
@ -1,45 +1,2 @@
|
|||
use serde_json::json;
|
||||
|
||||
pub fn get_dataset_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"about": {
|
||||
"type": ["string", "array"],
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
},
|
||||
"required": ["@context", "@type", "@id", "name", "provider", "about"]
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_location_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"geo": {"type": "object"},
|
||||
"gsp:hasGeometry": {"type": "object"}
|
||||
},
|
||||
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
|
||||
})
|
||||
}
|
||||
pub mod jsonld;
|
||||
pub mod schema;
|
||||
|
|
|
|||
62
src/main.rs
62
src/main.rs
|
|
@ -1,23 +1,59 @@
|
|||
use anyhow::Result;
|
||||
use anyhow::{Result, bail};
|
||||
|
||||
// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
async fn main() -> Result<()> {
|
||||
let ckan = ckanaction::CKAN::builder()
|
||||
.url("http://localhost:5000")
|
||||
.build();
|
||||
|
||||
// Paginate through /api/3/action/package_list until only an empty array is returned
|
||||
let response = ckan.package_list().call().await?;
|
||||
|
||||
let result = response
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.get("result")
|
||||
.unwrap()
|
||||
.as_array()
|
||||
.unwrap();
|
||||
|
||||
println!("{result:#?}");
|
||||
let mut offset = 0;
|
||||
loop {
|
||||
// TODO: Verify that only public datasets are returned, otherwise consider /package_search
|
||||
let response = ckan.package_list().offset(offset).limit(100).call().await?;
|
||||
// Verify successful response from CKAN API
|
||||
let Some(success_opt) = response.get("success") else {
|
||||
bail!("CKAN API did not return `success` key. Full response: {response}");
|
||||
};
|
||||
let Some(success) = success_opt.as_bool() else {
|
||||
bail!(
|
||||
"Could not parse success key as boolean from CKAN API. Full response: {response}"
|
||||
);
|
||||
};
|
||||
if success {
|
||||
let Some(result) = response.get("result") else {
|
||||
bail!("CKAN API did not return `result` key. Full response: {response}");
|
||||
};
|
||||
// Retrieve dataset names from current pagination
|
||||
let dataset_names = result.as_array().unwrap();
|
||||
if dataset_names.is_empty() {
|
||||
break;
|
||||
} else {
|
||||
// For each dataset in current pagination:
|
||||
for dataset_name in dataset_names {
|
||||
// 0. Get the dataset name as a string
|
||||
let dataset_name_str = dataset_name.as_str().unwrap();
|
||||
println!("{dataset_name_str}");
|
||||
// TODO: Identify if dataset names are unique
|
||||
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id
|
||||
let dataset_metadata = ckan
|
||||
.package_show()
|
||||
.id(dataset_name_str.to_string())
|
||||
.call()
|
||||
.await?;
|
||||
println!("{dataset_metadata:#?}");
|
||||
// 2. Construct JSON-LD based on the data from /package_show
|
||||
// 3. Validate the JSON-LD against the dataset JSON schema
|
||||
// 4. Print the JSON-LD on a new line to stdout
|
||||
}
|
||||
}
|
||||
} else {
|
||||
bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}");
|
||||
}
|
||||
offset = offset + 100;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
45
src/schema.rs
Normal file
45
src/schema.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
use serde_json::json;
|
||||
|
||||
pub fn get_dataset_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"about": {
|
||||
"type": ["string", "array"],
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
},
|
||||
"required": ["@context", "@type", "@id", "name", "provider", "about"]
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_location_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"geo": {"type": "object"},
|
||||
"gsp:hasGeometry": {"type": "object"}
|
||||
},
|
||||
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
|
||||
})
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{Result, bail};
|
||||
use ckan_geoconnex_bulk_runner::schema::get_location_schema;
|
||||
use serde_json::json;
|
||||
|
||||
#[test]
|
||||
|
|
@ -45,7 +46,7 @@ fn validate_usgs_location_jsonld() -> Result<()> {
|
|||
}
|
||||
});
|
||||
|
||||
let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema();
|
||||
let dataset_json_schema = get_location_schema();
|
||||
|
||||
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
|
||||
println!("Error during validation:");
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue