ckan_geoconnex_bulk_runner/generate_release/src/main.rs

105 lines
4.6 KiB
Rust

use anyhow::{Result, bail};
use geoconnex_utils::{jsonld::construct_dataset_jsonld_from_metadata, schema::get_dataset_schema};
use std::collections::HashMap;
#[tokio::main]
async fn main() -> Result<()> {
let namespace = env!("NAMESPACE");
let token = env!("API_TOKEN");
let instance_url = env!("INSTANCE_URL");
let mut headers = HashMap::new();
headers.insert("x-geoconnex-runner".to_string(), token.to_string());
let ckan = ckanaction::CKAN::builder()
.url(instance_url)
.headers(headers)
.build();
// Paginate through /api/3/action/package_list until only an empty array is returned
let mut offset = 0;
let limit = 100;
loop {
// TODO: Verify that only public datasets are returned
let response = ckan
.package_list()
.offset(offset)
.limit(limit)
.call()
.await?;
// Verify successful response from CKAN API
let Some(success_opt) = response.get("success") else {
bail!("CKAN API did not return `success` key. Full response: {response}");
};
let Some(success) = success_opt.as_bool() else {
bail!(
"Could not parse success key as boolean from CKAN API. Full response: {response}"
);
};
if success {
let Some(result) = response.get("result") else {
bail!("CKAN API did not return `result` key. Full response: {response}");
};
// Retrieve dataset names from current pagination
let dataset_names = result.as_array().unwrap();
if dataset_names.is_empty() {
break;
} else {
// For each dataset in current pagination:
for dataset_name in dataset_names {
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id
// TODO: Identify if dataset names are unique
let package_show_response = ckan
.package_show()
.id(dataset_name.as_str().unwrap().to_string())
.call()
.await?;
let Some(success) = package_show_response.get("success") else {
bail!(
"CKAN API did not return success key in /package_show response for dataset {dataset_name}. Full response: {response}"
);
};
if success.as_bool().unwrap() {
let Some(dataset_metadata) = package_show_response.get("result") else {
bail!(
"CKAN API did not return result object in /package_show response for dataset {dataset_name}. Full response: {response}"
);
};
// 2. Construct JSON-LD based on the data from /package_show
let jsonld = match construct_dataset_jsonld_from_metadata(
dataset_metadata.to_owned(),
instance_url.to_string(),
namespace.to_string(),
) {
Ok(j) => j,
Err(e) => {
eprintln!(
"Error while attempting to construct JSON-LD from dataset's metadata: {e}"
);
continue;
}
};
// 3. Validate the JSON-LD against the dataset JSON schema
if jsonschema::validate(&get_dataset_schema(), &jsonld).is_ok() {
// 4. Print the JSON-LD on a new line to stdout
println!("{jsonld}");
} else {
eprintln!("JSON-LD for {dataset_name} is not valid.");
// eprintln!("{jsonld}");
}
} else {
bail!(
"CKAN API returned {{\"success\": false\"}} for /package_show endpoint on dataset {dataset_name}. Full response: {response}"
);
}
}
}
} else {
bail!(
"CKAN API returned {{\"success\": false\"}} for /package_list endpoint. Full response: {response}"
);
}
offset = offset + limit;
}
Ok(())
}