diff --git a/.github/workflows/container.yml b/.github/workflows/container.yml deleted file mode 100644 index 459686d..0000000 --- a/.github/workflows/container.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Publish bulk loader Docker container - -on: - workflow_dispatch: - -jobs: - build_and_push: - runs-on: ubuntu-latest - strategy: - matrix: - namespace: [New_Mexico_Water_Data_Catalog] - steps: - - name: Login to GitHub Container Registry - uses: docker/login-action@v4 - with: - registry: ghcr.io - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v4 - - - name: Build and push - uses: docker/build-push-action@v7 - with: - file: ./bulk_loader/Dockerfile - push: true - tags: ghcr.io/dathere/ckan_geoconnex_bulk_runner:${{ matrix.namespace }} - cache-from: type=gha,scope=ckan_geoconnex_bulk_runner - platforms: linux/amd64 - cache-to: type=gha,mode=max,scope=ckan_geoconnex_bulk_runner - build-args: | - NAMESPACE=${{ matrix.namespace }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 39b972d..3b91bcb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,15 +2,11 @@ name: Publish CKAN-Geoconnex JSONL file as latest release on: release: types: [published] - workflow_dispatch: permissions: # To upload to releases contents: write jobs: publish: - strategy: - matrix: - ckan_instance: [{ name: New_Mexico_Water_Data_Catalog, url: https://catalog.newmexicowaterdata.org, token: NMWDC_API_BULK_LOADER_TOKEN }] name: Publish JSONL file runs-on: ubuntu-latest steps: @@ -24,10 +20,8 @@ jobs: - name: Run generate_release crate and upload JSONL file run: | cd ${{github.workspace}} - cargo run -p generate_release --release --verbose > ${{ matrix.ckan_instance.name }}.jsonl - gh release upload ${{github.event.release.tag_name}} ${{ matrix.ckan_instance.name }}.jsonl + cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources.jsonl + gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources.jsonl env: GITHUB_TOKEN: ${{ github.TOKEN }} - NAMESPACE: ${{ matrix.ckan_instance.name }} - INSTANCE_URL: ${{ matrix.ckan_instance.url }} - API_TOKEN: ${{ secrets[matrix.ckan_instance.token] }} + NMWDC_API_BULK_LOADER_TOKEN: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }} diff --git a/bulk_loader/Dockerfile b/bulk_loader/Dockerfile index ebe410d..9b049ec 100644 --- a/bulk_loader/Dockerfile +++ b/bulk_loader/Dockerfile @@ -2,12 +2,8 @@ FROM rust:1.96 AS builder WORKDIR /app RUN rustup set profile minimal COPY . . -ARG NAMESPACE -ENV NAMESPACE=$NAMESPACE -RUN cargo build -p bulk_loader --release +RUN cargo build --release FROM ubuntu:latest -RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates -RUN update-ca-certificates COPY --from=builder /app/target/release/bulk_loader / ENTRYPOINT ["/bulk_loader"] diff --git a/bulk_loader/src/main.rs b/bulk_loader/src/main.rs index d36133f..d84216f 100644 --- a/bulk_loader/src/main.rs +++ b/bulk_loader/src/main.rs @@ -2,11 +2,9 @@ use anyhow::Result; #[tokio::main] async fn main() -> Result<()> { - // Get the CKAN instance's Geoconnex namespace to filter for its JSON-LD data - let namespace = std::env!("NAMESPACE"); // Get latest release data which is organized as a single JSONL file // at https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest - let body = reqwest::get(format!("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/{namespace}.jsonl")) + let body = reqwest::get("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources.jsonl") .await? .text() .await?; diff --git a/ckan_geoconnex_bulk_runner_py/src/lib.rs b/ckan_geoconnex_bulk_runner_py/src/lib.rs index 26c537f..76a45d4 100644 --- a/ckan_geoconnex_bulk_runner_py/src/lib.rs +++ b/ckan_geoconnex_bulk_runner_py/src/lib.rs @@ -6,25 +6,25 @@ use pyo3::prelude::*; mod ckan_geoconnex_bulk_runner_py { use pyo3::{exceptions::PyException, prelude::*}; - // #[pyfunction] - // Construct Geoconnex-compatible JSON-LD as a string from dataset metadata. - // - // Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string. - // Output: Constructed Geoconnex-compatible JSON-LD as a string. - // fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult { - // match serde_json::to_value(dataset_metadata) { - // Ok(dataset_json) => { - // match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json) - // { - // Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| { - // PyException::new_err(format!( - // "Error when converting JSON-LD to string: {e}" - // )) - // }), - // Err(e) => Err(PyException::new_err(e.to_string())), - // } - // } - // Err(e) => Err(PyException::new_err(e.to_string())), - // } - // } + #[pyfunction] + /// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata. + /// + /// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string. + /// Output: Constructed Geoconnex-compatible JSON-LD as a string. + fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult { + match serde_json::to_value(dataset_metadata) { + Ok(dataset_json) => { + match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json) + { + Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| { + PyException::new_err(format!( + "Error when converting JSON-LD to string: {e}" + )) + }), + Err(e) => Err(PyException::new_err(e.to_string())), + } + } + Err(e) => Err(PyException::new_err(e.to_string())), + } + } } diff --git a/generate_release/src/main.rs b/generate_release/src/main.rs index 944d17e..db2abf8 100644 --- a/generate_release/src/main.rs +++ b/generate_release/src/main.rs @@ -4,14 +4,15 @@ use std::collections::HashMap; #[tokio::main] async fn main() -> Result<()> { - let namespace = env!("NAMESPACE"); - let token = env!("API_TOKEN"); - let instance_url = env!("INSTANCE_URL"); + // Identify required header data + let Ok(nmwdc_token) = std::env::var("NMWDC_API_BULK_LOADER_TOKEN") else { + bail!("Could not find environment variable NMWDC_API_BULK_LOADER_TOKEN."); + }; let mut headers = HashMap::new(); - headers.insert("x-geoconnex-runner".to_string(), token.to_string()); + headers.insert("x-geoconnex-runner".to_string(), nmwdc_token); let ckan = ckanaction::CKAN::builder() - .url(instance_url) + .url("https://catalog.newmexicowaterdata.org") .headers(headers) .build(); @@ -67,8 +68,6 @@ async fn main() -> Result<()> { // 2. Construct JSON-LD based on the data from /package_show let jsonld = match construct_dataset_jsonld_from_metadata( dataset_metadata.to_owned(), - instance_url.to_string(), - namespace.to_string(), ) { Ok(j) => j, Err(e) => { diff --git a/geoconnex_utils/src/jsonld.rs b/geoconnex_utils/src/jsonld.rs index f28429b..0cbddb9 100644 --- a/geoconnex_utils/src/jsonld.rs +++ b/geoconnex_utils/src/jsonld.rs @@ -3,8 +3,6 @@ use serde_json::json; pub fn construct_dataset_jsonld_from_metadata( dataset_metadata: serde_json::Value, - instance_url: String, - namespace: String, ) -> Result { let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap(); eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}"); @@ -62,14 +60,14 @@ pub fn construct_dataset_jsonld_from_metadata( }, "@type": "Dataset", // TODO: Customize namespace based on CKAN instance being used - "@id": format!("https://geoconnex.us/ckan/{namespace}/{dataset_id}"), + "@id": format!("https://geoconnex.us/ckan/sandbox/{dataset_id}"), "name": dataset_title, "provider": { "@type": "Organization", "name": organization_name }, // TODO: Customize CKAN instance URL based on CKAN instance being used - "url": format!("{instance_url}/dataset/{dataset_id}") + "url": format!("https://sandbox.opendataportal.us/dataset/{dataset_id}") }); let jsonld_map = jsonld.as_object_mut().unwrap(); if about.len() > 0 {