mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 15:12:20 +00:00
refactor: limit, dataset name, also add validation function
This commit is contained in:
parent
baeb09acb7
commit
3497994681
2 changed files with 24 additions and 8 deletions
|
|
@ -1,3 +1,13 @@
|
|||
use crate::schema::get_dataset_schema;
|
||||
|
||||
pub fn construct_dataset_jsonld_from_metadata(metadata: serde_json::Value) -> serde_json::Value {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn validate_dataset_jsonld(jsonld: serde_json::Value) -> bool {
|
||||
if let Ok(_) = jsonschema::validate(&get_dataset_schema(), &jsonld) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
|
|
|||
22
src/main.rs
22
src/main.rs
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{Result, bail};
|
||||
use ckan_geoconnex_bulk_runner::jsonld::construct_dataset_jsonld_from_metadata;
|
||||
|
||||
// TODO: Ensure error output is only streamed to stderr as per Geoconnex docs
|
||||
|
||||
|
|
@ -10,9 +11,15 @@ async fn main() -> Result<()> {
|
|||
|
||||
// Paginate through /api/3/action/package_list until only an empty array is returned
|
||||
let mut offset = 0;
|
||||
let limit = 100;
|
||||
loop {
|
||||
// TODO: Verify that only public datasets are returned, otherwise consider /package_search
|
||||
let response = ckan.package_list().offset(offset).limit(100).call().await?;
|
||||
// TODO: Verify that only public datasets are returned
|
||||
let response = ckan
|
||||
.package_list()
|
||||
.offset(offset)
|
||||
.limit(limit)
|
||||
.call()
|
||||
.await?;
|
||||
// Verify successful response from CKAN API
|
||||
let Some(success_opt) = response.get("success") else {
|
||||
bail!("CKAN API did not return `success` key. Full response: {response}");
|
||||
|
|
@ -33,18 +40,17 @@ async fn main() -> Result<()> {
|
|||
} else {
|
||||
// For each dataset in current pagination:
|
||||
for dataset_name in dataset_names {
|
||||
// 0. Get the dataset name as a string
|
||||
let dataset_name_str = dataset_name.as_str().unwrap();
|
||||
println!("{dataset_name_str}");
|
||||
// TODO: Identify if dataset names are unique
|
||||
// 1. Get the dataset's metadata with /package_show by using the dataset name as the id
|
||||
// TODO: Identify if dataset names are unique
|
||||
let dataset_metadata = ckan
|
||||
.package_show()
|
||||
.id(dataset_name_str.to_string())
|
||||
.id(dataset_name.as_str().unwrap().to_string())
|
||||
.call()
|
||||
.await?;
|
||||
println!("{dataset_metadata:#?}");
|
||||
// 2. Construct JSON-LD based on the data from /package_show
|
||||
let jsonld = construct_dataset_jsonld_from_metadata(dataset_metadata);
|
||||
println!("{jsonld:#?}");
|
||||
// 3. Validate the JSON-LD against the dataset JSON schema
|
||||
// 4. Print the JSON-LD on a new line to stdout
|
||||
}
|
||||
|
|
@ -52,7 +58,7 @@ async fn main() -> Result<()> {
|
|||
} else {
|
||||
bail!("CKAN API returned {{\"success\": false\"}}. Full response: {response}");
|
||||
}
|
||||
offset = offset + 100;
|
||||
offset = offset + limit;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue