mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 23:22:20 +00:00
feat: multi-CKAN-instance compatibility
This commit is contained in:
parent
59564c9de3
commit
74c5185e23
6 changed files with 55 additions and 33 deletions
5
.github/workflows/container.yml
vendored
5
.github/workflows/container.yml
vendored
|
|
@ -6,6 +6,9 @@ on:
|
|||
jobs:
|
||||
build_and_push:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
namespace: [New_Mexico_Water_Data_Catalog]
|
||||
steps:
|
||||
- name: Login to GitHub Container Registry
|
||||
uses: docker/login-action@v4
|
||||
|
|
@ -26,3 +29,5 @@ jobs:
|
|||
cache-from: type=gha,scope=ckan_geoconnex_bulk_runner
|
||||
platforms: linux/amd64
|
||||
cache-to: type=gha,mode=max,scope=ckan_geoconnex_bulk_runner
|
||||
env:
|
||||
NAMESPACE: ${{ matrix.namespace }}
|
||||
|
|
|
|||
18
.github/workflows/release.yml
vendored
18
.github/workflows/release.yml
vendored
|
|
@ -2,11 +2,21 @@ name: Publish CKAN-Geoconnex JSONL file as latest release
|
|||
on:
|
||||
release:
|
||||
types: [published]
|
||||
workflow_dispatch:
|
||||
permissions:
|
||||
# To upload to releases
|
||||
contents: write
|
||||
jobs:
|
||||
publish:
|
||||
strategy:
|
||||
matrix:
|
||||
ckan_instance: [
|
||||
{
|
||||
name: New_Mexico_Water_Data_Catalog,
|
||||
url: https://catalog.newmexicowaterdata.org,
|
||||
token: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }}
|
||||
}
|
||||
]
|
||||
name: Publish JSONL file
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
@ -20,8 +30,10 @@ jobs:
|
|||
- name: Run generate_release crate and upload JSONL file
|
||||
run: |
|
||||
cd ${{github.workspace}}
|
||||
cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources.jsonl
|
||||
gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources.jsonl
|
||||
cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources-${{ matrix.namespace }}.jsonl
|
||||
gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources-${{ matrix.namespace }}.jsonl
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ github.TOKEN }}
|
||||
NMWDC_API_BULK_LOADER_TOKEN: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }}
|
||||
NAMESPACE: ${{ matrix.namespace }}
|
||||
INSTANCE_URL: ${{ matrix.ckan_instance.url }}
|
||||
API_TOKEN: ${{ matrix.ckan_instance.token }}
|
||||
|
|
|
|||
|
|
@ -2,9 +2,11 @@ use anyhow::Result;
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Get the CKAN instance's Geoconnex namespace to filter for its JSON-LD data
|
||||
let namespace = std::env!("NAMESPACE");
|
||||
// Get latest release data which is organized as a single JSONL file
|
||||
// at https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest
|
||||
let body = reqwest::get("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources.jsonl")
|
||||
let body = reqwest::get(format!("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources-{namespace}.jsonl"))
|
||||
.await?
|
||||
.text()
|
||||
.await?;
|
||||
|
|
|
|||
|
|
@ -6,25 +6,25 @@ use pyo3::prelude::*;
|
|||
mod ckan_geoconnex_bulk_runner_py {
|
||||
use pyo3::{exceptions::PyException, prelude::*};
|
||||
|
||||
#[pyfunction]
|
||||
/// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata.
|
||||
///
|
||||
/// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string.
|
||||
/// Output: Constructed Geoconnex-compatible JSON-LD as a string.
|
||||
fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult<String> {
|
||||
match serde_json::to_value(dataset_metadata) {
|
||||
Ok(dataset_json) => {
|
||||
match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json)
|
||||
{
|
||||
Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| {
|
||||
PyException::new_err(format!(
|
||||
"Error when converting JSON-LD to string: {e}"
|
||||
))
|
||||
}),
|
||||
Err(e) => Err(PyException::new_err(e.to_string())),
|
||||
}
|
||||
}
|
||||
Err(e) => Err(PyException::new_err(e.to_string())),
|
||||
}
|
||||
}
|
||||
// #[pyfunction]
|
||||
// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata.
|
||||
//
|
||||
// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string.
|
||||
// Output: Constructed Geoconnex-compatible JSON-LD as a string.
|
||||
// fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult<String> {
|
||||
// match serde_json::to_value(dataset_metadata) {
|
||||
// Ok(dataset_json) => {
|
||||
// match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json)
|
||||
// {
|
||||
// Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| {
|
||||
// PyException::new_err(format!(
|
||||
// "Error when converting JSON-LD to string: {e}"
|
||||
// ))
|
||||
// }),
|
||||
// Err(e) => Err(PyException::new_err(e.to_string())),
|
||||
// }
|
||||
// }
|
||||
// Err(e) => Err(PyException::new_err(e.to_string())),
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,15 +4,14 @@ use std::collections::HashMap;
|
|||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
// Identify required header data
|
||||
let Ok(nmwdc_token) = std::env::var("NMWDC_API_BULK_LOADER_TOKEN") else {
|
||||
bail!("Could not find environment variable NMWDC_API_BULK_LOADER_TOKEN.");
|
||||
};
|
||||
let namespace = env!("NAMESPACE");
|
||||
let token = env!("API_TOKEN");
|
||||
let instance_url = env!("INSTANCE_URL");
|
||||
let mut headers = HashMap::new();
|
||||
headers.insert("x-geoconnex-runner".to_string(), nmwdc_token);
|
||||
headers.insert("x-geoconnex-runner".to_string(), token.to_string());
|
||||
|
||||
let ckan = ckanaction::CKAN::builder()
|
||||
.url("https://catalog.newmexicowaterdata.org")
|
||||
.url(instance_url)
|
||||
.headers(headers)
|
||||
.build();
|
||||
|
||||
|
|
@ -68,6 +67,8 @@ async fn main() -> Result<()> {
|
|||
// 2. Construct JSON-LD based on the data from /package_show
|
||||
let jsonld = match construct_dataset_jsonld_from_metadata(
|
||||
dataset_metadata.to_owned(),
|
||||
instance_url.to_string(),
|
||||
namespace.to_string(),
|
||||
) {
|
||||
Ok(j) => j,
|
||||
Err(e) => {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ use serde_json::json;
|
|||
|
||||
pub fn construct_dataset_jsonld_from_metadata(
|
||||
dataset_metadata: serde_json::Value,
|
||||
instance_url: String,
|
||||
namespace: String,
|
||||
) -> Result<serde_json::Value> {
|
||||
let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap();
|
||||
eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}");
|
||||
|
|
@ -60,14 +62,14 @@ pub fn construct_dataset_jsonld_from_metadata(
|
|||
},
|
||||
"@type": "Dataset",
|
||||
// TODO: Customize namespace based on CKAN instance being used
|
||||
"@id": format!("https://geoconnex.us/ckan/nmwdh/{dataset_id}"),
|
||||
"@id": format!("https://geoconnex.us/ckan/{namespace}/{dataset_id}"),
|
||||
"name": dataset_title,
|
||||
"provider": {
|
||||
"@type": "Organization",
|
||||
"name": organization_name
|
||||
},
|
||||
// TODO: Customize CKAN instance URL based on CKAN instance being used
|
||||
"url": format!("https://catalog.newmexicowaterdata.org/dataset/{dataset_id}")
|
||||
"url": format!("{instance_url}/dataset/{dataset_id}")
|
||||
});
|
||||
let jsonld_map = jsonld.as_object_mut().unwrap();
|
||||
if about.len() > 0 {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue