diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml index 52eaba2..8401d2a 100644 --- a/.github/workflows/container.yml +++ b/.github/workflows/container.yml @@ -6,6 +6,9 @@ on: jobs: build_and_push: runs-on: ubuntu-latest + strategy: + matrix: + namespace: [New_Mexico_Water_Data_Catalog] steps: - name: Login to GitHub Container Registry uses: docker/login-action@v4 @@ -26,3 +29,5 @@ jobs: cache-from: type=gha,scope=ckan_geoconnex_bulk_runner platforms: linux/amd64 cache-to: type=gha,mode=max,scope=ckan_geoconnex_bulk_runner + env: + NAMESPACE: ${{ matrix.namespace }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3b91bcb..5124011 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,11 +2,21 @@ name: Publish CKAN-Geoconnex JSONL file as latest release on: release: types: [published] + workflow_dispatch: permissions: # To upload to releases contents: write jobs: publish: + strategy: + matrix: + ckan_instance: [ + { + name: New_Mexico_Water_Data_Catalog, + url: https://catalog.newmexicowaterdata.org, + token: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }} + } + ] name: Publish JSONL file runs-on: ubuntu-latest steps: @@ -20,8 +30,10 @@ jobs: - name: Run generate_release crate and upload JSONL file run: | cd ${{github.workspace}} - cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources.jsonl - gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources.jsonl + cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources-${{ matrix.namespace }}.jsonl + gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources-${{ matrix.namespace }}.jsonl env: GITHUB_TOKEN: ${{ github.TOKEN }} - NMWDC_API_BULK_LOADER_TOKEN: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }} + NAMESPACE: ${{ matrix.namespace }} + INSTANCE_URL: ${{ matrix.ckan_instance.url }} + API_TOKEN: ${{ matrix.ckan_instance.token }} diff --git a/bulk_loader/src/main.rs b/bulk_loader/src/main.rs index d84216f..2073401 100644 --- a/bulk_loader/src/main.rs +++ b/bulk_loader/src/main.rs @@ -2,9 +2,11 @@ use anyhow::Result; #[tokio::main] async fn main() -> Result<()> { + // Get the CKAN instance's Geoconnex namespace to filter for its JSON-LD data + let namespace = std::env!("NAMESPACE"); // Get latest release data which is organized as a single JSONL file // at https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest - let body = reqwest::get("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources.jsonl") + let body = reqwest::get(format!("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources-{namespace}.jsonl")) .await? .text() .await?; diff --git a/ckan_geoconnex_bulk_runner_py/src/lib.rs b/ckan_geoconnex_bulk_runner_py/src/lib.rs index 76a45d4..26c537f 100644 --- a/ckan_geoconnex_bulk_runner_py/src/lib.rs +++ b/ckan_geoconnex_bulk_runner_py/src/lib.rs @@ -6,25 +6,25 @@ use pyo3::prelude::*; mod ckan_geoconnex_bulk_runner_py { use pyo3::{exceptions::PyException, prelude::*}; - #[pyfunction] - /// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata. - /// - /// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string. - /// Output: Constructed Geoconnex-compatible JSON-LD as a string. - fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult { - match serde_json::to_value(dataset_metadata) { - Ok(dataset_json) => { - match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json) - { - Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| { - PyException::new_err(format!( - "Error when converting JSON-LD to string: {e}" - )) - }), - Err(e) => Err(PyException::new_err(e.to_string())), - } - } - Err(e) => Err(PyException::new_err(e.to_string())), - } - } + // #[pyfunction] + // Construct Geoconnex-compatible JSON-LD as a string from dataset metadata. + // + // Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string. + // Output: Constructed Geoconnex-compatible JSON-LD as a string. + // fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult { + // match serde_json::to_value(dataset_metadata) { + // Ok(dataset_json) => { + // match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json) + // { + // Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| { + // PyException::new_err(format!( + // "Error when converting JSON-LD to string: {e}" + // )) + // }), + // Err(e) => Err(PyException::new_err(e.to_string())), + // } + // } + // Err(e) => Err(PyException::new_err(e.to_string())), + // } + // } } diff --git a/generate_release/src/main.rs b/generate_release/src/main.rs index db2abf8..944d17e 100644 --- a/generate_release/src/main.rs +++ b/generate_release/src/main.rs @@ -4,15 +4,14 @@ use std::collections::HashMap; #[tokio::main] async fn main() -> Result<()> { - // Identify required header data - let Ok(nmwdc_token) = std::env::var("NMWDC_API_BULK_LOADER_TOKEN") else { - bail!("Could not find environment variable NMWDC_API_BULK_LOADER_TOKEN."); - }; + let namespace = env!("NAMESPACE"); + let token = env!("API_TOKEN"); + let instance_url = env!("INSTANCE_URL"); let mut headers = HashMap::new(); - headers.insert("x-geoconnex-runner".to_string(), nmwdc_token); + headers.insert("x-geoconnex-runner".to_string(), token.to_string()); let ckan = ckanaction::CKAN::builder() - .url("https://catalog.newmexicowaterdata.org") + .url(instance_url) .headers(headers) .build(); @@ -68,6 +67,8 @@ async fn main() -> Result<()> { // 2. Construct JSON-LD based on the data from /package_show let jsonld = match construct_dataset_jsonld_from_metadata( dataset_metadata.to_owned(), + instance_url.to_string(), + namespace.to_string(), ) { Ok(j) => j, Err(e) => { diff --git a/geoconnex_utils/src/jsonld.rs b/geoconnex_utils/src/jsonld.rs index 5b459e8..f28429b 100644 --- a/geoconnex_utils/src/jsonld.rs +++ b/geoconnex_utils/src/jsonld.rs @@ -3,6 +3,8 @@ use serde_json::json; pub fn construct_dataset_jsonld_from_metadata( dataset_metadata: serde_json::Value, + instance_url: String, + namespace: String, ) -> Result { let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap(); eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}"); @@ -60,14 +62,14 @@ pub fn construct_dataset_jsonld_from_metadata( }, "@type": "Dataset", // TODO: Customize namespace based on CKAN instance being used - "@id": format!("https://geoconnex.us/ckan/nmwdh/{dataset_id}"), + "@id": format!("https://geoconnex.us/ckan/{namespace}/{dataset_id}"), "name": dataset_title, "provider": { "@type": "Organization", "name": organization_name }, // TODO: Customize CKAN instance URL based on CKAN instance being used - "url": format!("https://catalog.newmexicowaterdata.org/dataset/{dataset_id}") + "url": format!("{instance_url}/dataset/{dataset_id}") }); let jsonld_map = jsonld.as_object_mut().unwrap(); if about.len() > 0 {