refactor: limit, dataset name, also add validation function

This commit is contained in:
rzmk 2026-05-05 19:35:13 -04:00
parent baeb09acb7
commit 3497994681
2 changed files with 24 additions and 8 deletions

View file

@ -1,3 +1,13 @@
use crate::schema::get_dataset_schema;
pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value { pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value {
todo!() todo!()
} }
pub fn validate_dataset_jsonld(jsonld: serde_json::Value) -> bool {
if let Ok(_) = jsonschema::validate(&get_dataset_schema(), &jsonld) {
true
} else {
false
}
}

View file

@ -1,4 +1,5 @@
use anyhow::{Result, bail}; use anyhow::{Result, bail};
use ckan_geoconnex_bulk_runner::jsonld::construct_dataset_jsonld_from_metadata;
// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs // TODO: Ensure error output is only streamed to stderr as per Geoconnex docs
@ -10,9 +11,15 @@ async fn main() -> Result<()> {
// Paginate through /api/3/action/package_list until only an empty array is returned // Paginate through /api/3/action/package_list until only an empty array is returned
let mut offset = 0; let mut offset = 0;
let limit = 100;
loop { loop {
// TODO: Verify that only public datasets are returned, otherwise consider /package_search // TODO: Verify that only public datasets are returned
let response = ckan.package_list().offset(offset).limit(100).call().await?; let response = ckan
.package_list()
.offset(offset)
.limit(limit)
.call()
.await?;
// Verify successful response from CKAN API // Verify successful response from CKAN API
let Some(success_opt) = response.get("success") else { let Some(success_opt) = response.get("success") else {
bail!("CKAN API did not return `success` key. Full response: {response}"); bail!("CKAN API did not return `success` key. Full response: {response}");
@ -33,18 +40,17 @@ async fn main() -> Result<()> {
} else { } else {
// For each dataset in current pagination: // For each dataset in current pagination:
for dataset_name in dataset_names { for dataset_name in dataset_names {
// 0. Get the dataset name as a string
let dataset_name_str = dataset_name.as_str().unwrap();
println!("{dataset_name_str}");
// TODO: Identify if dataset names are unique
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id // 1. Get the dataset's metadata with /package_show by using the dataset name as the id
// TODO: Identify if dataset names are unique
let dataset_metadata = ckan let dataset_metadata = ckan
.package_show() .package_show()
.id(dataset_name_str.to_string()) .id(dataset_name.as_str().unwrap().to_string())
.call() .call()
.await?; .await?;
println!("{dataset_metadata:#?}"); println!("{dataset_metadata:#?}");
// 2. Construct JSON-LD based on the data from /package_show // 2. Construct JSON-LD based on the data from /package_show
let jsonld = construct_dataset_jsonld_from_metadata(dataset_metadata);
println!("{jsonld:#?}");
// 3. Validate the JSON-LD against the dataset JSON schema // 3. Validate the JSON-LD against the dataset JSON schema
// 4. Print the JSON-LD on a new line to stdout // 4. Print the JSON-LD on a new line to stdout
} }
@ -52,7 +58,7 @@ async fn main() -> Result<()> {
} else { } else {
bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}"); bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}");
} }
offset = offset + 100; offset = offset + limit;
} }
Ok(()) Ok(())