mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 15:12:20 +00:00
feat: location schema, architecture, ckanaction update, new test
This commit is contained in:
parent
d01e05ab48
commit
baeb09acb7
7 changed files with 105 additions and 62 deletions
5
Cargo.lock
generated
5
Cargo.lock
generated
|
|
@ -178,14 +178,15 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ckanaction"
|
name = "ckanaction"
|
||||||
version = "0.1.4"
|
version = "0.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "13ec17a3808b02c993f8fae3be6dc8a7f153b44b17cdebb2e2a9b0f836cb8e03"
|
checksum = "d2e5332c456e22de38ddf1c8ed0cdfaeba4ab432f5de8cd971b6450ef62f8add"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bon",
|
"bon",
|
||||||
"reqwest 0.12.28",
|
"reqwest 0.12.28",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
|
"thiserror",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.102"
|
anyhow = "1.0.102"
|
||||||
ckanaction = "0.1.4"
|
ckanaction = "0.2.0"
|
||||||
jsonschema = "0.46.4"
|
jsonschema = "0.46.4"
|
||||||
serde_json = "1.0.149"
|
serde_json = "1.0.149"
|
||||||
tokio = { version = "1.52.1", features = ["full"] }
|
tokio = { version = "1.52.1", features = ["full"] }
|
||||||
|
|
|
||||||
3
src/jsonld.rs
Normal file
3
src/jsonld.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
47
src/lib.rs
47
src/lib.rs
|
|
@ -1,45 +1,2 @@
|
||||||
use serde_json::json;
|
pub mod jsonld;
|
||||||
|
pub mod schema;
|
||||||
pub fn get_dataset_schema() -> serde_json::Value {
|
|
||||||
json!({
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"@context": {"type": ["string", "object"]},
|
|
||||||
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
|
|
||||||
"@id": {"type": "string"},
|
|
||||||
"name": {"type": "string"},
|
|
||||||
"provider": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
|
||||||
},
|
|
||||||
"about": {
|
|
||||||
"type": ["string", "array"],
|
|
||||||
"items": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
|
||||||
},
|
|
||||||
"minItems": 1
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["@context", "@type", "@id", "name", "provider", "about"]
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_location_schema() -> serde_json::Value {
|
|
||||||
json!({
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"@context": {"type": ["string", "object"]},
|
|
||||||
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
|
||||||
"@id": {"type": "string"},
|
|
||||||
"name": {"type": "string"},
|
|
||||||
"provider": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
|
||||||
},
|
|
||||||
"geo": {"type": "object"},
|
|
||||||
"gsp:hasGeometry": {"type": "object"}
|
|
||||||
},
|
|
||||||
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
|
||||||
62
src/main.rs
62
src/main.rs
|
|
@ -1,23 +1,59 @@
|
||||||
use anyhow::Result;
|
use anyhow::{Result, bail};
|
||||||
|
|
||||||
|
// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
async fn main() -> Result<()> {
|
||||||
let ckan = ckanaction::CKAN::builder()
|
let ckan = ckanaction::CKAN::builder()
|
||||||
.url("http://localhost:5000")
|
.url("http://localhost:5000")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
// Paginate through /api/3/action/package_list until only an empty array is returned
|
// Paginate through /api/3/action/package_list until only an empty array is returned
|
||||||
let response = ckan.package_list().call().await?;
|
let mut offset = 0;
|
||||||
|
loop {
|
||||||
let result = response
|
// TODO: Verify that only public datasets are returned, otherwise consider /package_search
|
||||||
.as_object()
|
let response = ckan.package_list().offset(offset).limit(100).call().await?;
|
||||||
.unwrap()
|
// Verify successful response from CKAN API
|
||||||
.get("result")
|
let Some(success_opt) = response.get("success") else {
|
||||||
.unwrap()
|
bail!("CKAN API did not return `success` key. Full response: {response}");
|
||||||
.as_array()
|
};
|
||||||
.unwrap();
|
let Some(success) = success_opt.as_bool() else {
|
||||||
|
bail!(
|
||||||
println!("{result:#?}");
|
"Could not parse success key as boolean from CKAN API. Full response: {response}"
|
||||||
|
);
|
||||||
|
};
|
||||||
|
if success {
|
||||||
|
let Some(result) = response.get("result") else {
|
||||||
|
bail!("CKAN API did not return `result` key. Full response: {response}");
|
||||||
|
};
|
||||||
|
// Retrieve dataset names from current pagination
|
||||||
|
let dataset_names = result.as_array().unwrap();
|
||||||
|
if dataset_names.is_empty() {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
// For each dataset in current pagination:
|
||||||
|
for dataset_name in dataset_names {
|
||||||
|
// 0. Get the dataset name as a string
|
||||||
|
let dataset_name_str = dataset_name.as_str().unwrap();
|
||||||
|
println!("{dataset_name_str}");
|
||||||
|
// TODO: Identify if dataset names are unique
|
||||||
|
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id
|
||||||
|
let dataset_metadata = ckan
|
||||||
|
.package_show()
|
||||||
|
.id(dataset_name_str.to_string())
|
||||||
|
.call()
|
||||||
|
.await?;
|
||||||
|
println!("{dataset_metadata:#?}");
|
||||||
|
// 2. Construct JSON-LD based on the data from /package_show
|
||||||
|
// 3. Validate the JSON-LD against the dataset JSON schema
|
||||||
|
// 4. Print the JSON-LD on a new line to stdout
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}");
|
||||||
|
}
|
||||||
|
offset = offset + 100;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
45
src/schema.rs
Normal file
45
src/schema.rs
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
use serde_json::json;
|
||||||
|
|
||||||
|
pub fn get_dataset_schema() -> serde_json::Value {
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"@context": {"type": ["string", "object"]},
|
||||||
|
"@type": {"type": ["string", "array"], "contains": {"const": "Dataset"}},
|
||||||
|
"@id": {"type": "string"},
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"provider": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"about": {
|
||||||
|
"type": ["string", "array"],
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
||||||
|
},
|
||||||
|
"minItems": 1
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["@context", "@type", "@id", "name", "provider", "about"]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_location_schema() -> serde_json::Value {
|
||||||
|
json!({
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"@context": {"type": ["string", "object"]},
|
||||||
|
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
||||||
|
"@id": {"type": "string"},
|
||||||
|
"name": {"type": "string"},
|
||||||
|
"provider": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||||
|
},
|
||||||
|
"geo": {"type": "object"},
|
||||||
|
"gsp:hasGeometry": {"type": "object"}
|
||||||
|
},
|
||||||
|
"required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use anyhow::{Result, bail};
|
use anyhow::{Result, bail};
|
||||||
|
use ckan_geoconnex_bulk_runner::schema::get_location_schema;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
@ -45,7 +46,7 @@ fn validate_usgs_location_jsonld() -> Result<()> {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
let dataset_json_schema = ckan_geoconnex_bulk_runner::get_location_schema();
|
let dataset_json_schema = get_location_schema();
|
||||||
|
|
||||||
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
|
if let Err(e) = jsonschema::validate(&dataset_json_schema, &usgs_location_jsonld) {
|
||||||
println!("Error during validation:");
|
println!("Error during validation:");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue