mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 15:12:20 +00:00
feat: initial scaffolding for ckan_geoconnex_bulk_runner
This commit is contained in:
commit
71370c3d7c
6 changed files with 2401 additions and 0 deletions
2328
Cargo.lock
generated
Normal file
2328
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
11
Cargo.toml
Normal file
11
Cargo.toml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "ckan_geoconnex_bulk_runner"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.102"
|
||||
ckanaction = "0.1.4"
|
||||
jsonschema = "0.46.4"
|
||||
serde_json = "1.0.149"
|
||||
tokio = { version = "1.52.1", features = ["full"] }
|
||||
15
README.md
Normal file
15
README.md
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
# ckan_geoconnex_bulk_runner
|
||||
|
||||
> Status: This codebase is currently a work in progress and more documentation is planned.
|
||||
|
||||
The `ckan_geoconnex_bulk_runner` codebase is meant to run as a container for a bulk integration of a [CKAN](https://ckan.org) instance's relevant datasets and locations (e.g. for water data hubs) to the [Geoconnex](https://geoconnex.us) knowledge graph. The codebase ultimately runs as a program outputting to standard output JSON-LD on a new line for each approved dataset/location which the Geoconnex crawler then uses to update the Geoconnex knowledge graph.
|
||||
|
||||
Refer to "The Geoconnex Bulk Namespace" documentation here for more information: https://github.com/internetofwater/geoconnex.us/tree/master/namespaces/bulk
|
||||
|
||||
This runner is expected to be implemented for a water data hub with [DataPusher+](https://github.com/dathere/datapusher-plus) enabled. For questions reach out to [datHere](https://dathere.com), [Center for Geospatial Solutions](https://cgsearth.org/), or add an issue/discussion.
|
||||
|
||||
## Installation and setup
|
||||
|
||||
```bash
|
||||
cargo run --release
|
||||
```
|
||||
2
config.toml
Normal file
2
config.toml
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
ckan_url = "https://data.dathere.com"
|
||||
geoconnex_about_field_name = "geoconnex_about"
|
||||
31
src/main.rs
Normal file
31
src/main.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
mod utils;
|
||||
|
||||
use anyhow::Result;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let ckan = ckanaction::CKAN::builder()
|
||||
.url("https://data.dathere.com")
|
||||
.build();
|
||||
// TODO: Paginate through package_list and run package_show for each package
|
||||
// If about exists then construct JSON-LD and validate then output JSON-LD to stdout on a new line
|
||||
let response = ckan.package_show().id("".to_string()).call().await?;
|
||||
|
||||
let result = response
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.get("result")
|
||||
.unwrap()
|
||||
.as_object()
|
||||
.unwrap();
|
||||
if let Some(geoconnex_about) = result.get("geoconnex_about") {
|
||||
// Check if at least one valid reference feature exists in dataset metadata
|
||||
}
|
||||
// TODO: Construct JSON-LD if valid `about`
|
||||
// TODO: Validate constructed JSON-LD against JSON schema
|
||||
// TODO: Print JSON-LD to new line
|
||||
|
||||
println!("{result:#?}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
14
src/utils.rs
Normal file
14
src/utils.rs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
use serde_json::json;
|
||||
|
||||
async fn construct_jsonld(metadata: serde_json::Value) -> serde_json::Value {}
|
||||
|
||||
async fn get_dataset_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"const": "Dataset"}
|
||||
},
|
||||
"required": []
|
||||
})
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue