mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 23:22:20 +00:00
Compare commits
No commits in common. "main" and "0.0.0" have entirely different histories.
7 changed files with 34 additions and 82 deletions
33
.github/workflows/container.yml
vendored
33
.github/workflows/container.yml
vendored
|
|
@ -1,33 +0,0 @@
|
||||||
name: Publish bulk loader Docker container
|
|
||||||
|
|
||||||
on:
|
|
||||||
workflow_dispatch:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
build_and_push:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
namespace: [New_Mexico_Water_Data_Catalog]
|
|
||||||
steps:
|
|
||||||
- name: Login to GitHub Container Registry
|
|
||||||
uses: docker/login-action@v4
|
|
||||||
with:
|
|
||||||
registry: ghcr.io
|
|
||||||
username: ${{ github.actor }}
|
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
|
||||||
uses: docker/setup-buildx-action@v4
|
|
||||||
|
|
||||||
- name: Build and push
|
|
||||||
uses: docker/build-push-action@v7
|
|
||||||
with:
|
|
||||||
file: ./bulk_loader/Dockerfile
|
|
||||||
push: true
|
|
||||||
tags: ghcr.io/dathere/ckan_geoconnex_bulk_runner:${{ matrix.namespace }}
|
|
||||||
cache-from: type=gha,scope=ckan_geoconnex_bulk_runner
|
|
||||||
platforms: linux/amd64
|
|
||||||
cache-to: type=gha,mode=max,scope=ckan_geoconnex_bulk_runner
|
|
||||||
build-args: |
|
|
||||||
NAMESPACE=${{ matrix.namespace }}
|
|
||||||
12
.github/workflows/release.yml
vendored
12
.github/workflows/release.yml
vendored
|
|
@ -2,15 +2,11 @@ name: Publish CKAN-Geoconnex JSONL file as latest release
|
||||||
on:
|
on:
|
||||||
release:
|
release:
|
||||||
types: [published]
|
types: [published]
|
||||||
workflow_dispatch:
|
|
||||||
permissions:
|
permissions:
|
||||||
# To upload to releases
|
# To upload to releases
|
||||||
contents: write
|
contents: write
|
||||||
jobs:
|
jobs:
|
||||||
publish:
|
publish:
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
ckan_instance: [{ name: New_Mexico_Water_Data_Catalog, url: https://catalog.newmexicowaterdata.org, token: NMWDC_API_BULK_LOADER_TOKEN }]
|
|
||||||
name: Publish JSONL file
|
name: Publish JSONL file
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
@ -24,10 +20,8 @@ jobs:
|
||||||
- name: Run generate_release crate and upload JSONL file
|
- name: Run generate_release crate and upload JSONL file
|
||||||
run: |
|
run: |
|
||||||
cd ${{github.workspace}}
|
cd ${{github.workspace}}
|
||||||
cargo run -p generate_release --release --verbose > ${{ matrix.ckan_instance.name }}.jsonl
|
cargo run -p generate_release --release --verbose > ckan-geoconnex-web-resources.jsonl
|
||||||
gh release upload ${{github.event.release.tag_name}} ${{ matrix.ckan_instance.name }}.jsonl
|
gh release upload ${{github.event.release.tag_name}} ckan-geoconnex-web-resources.jsonl
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ github.TOKEN }}
|
GITHUB_TOKEN: ${{ github.TOKEN }}
|
||||||
NAMESPACE: ${{ matrix.ckan_instance.name }}
|
NMWDC_API_BULK_LOADER_TOKEN: ${{ secrets.NMWDC_API_BULK_LOADER_TOKEN }}
|
||||||
INSTANCE_URL: ${{ matrix.ckan_instance.url }}
|
|
||||||
API_TOKEN: ${{ secrets[matrix.ckan_instance.token] }}
|
|
||||||
|
|
|
||||||
|
|
@ -2,12 +2,8 @@ FROM rust:1.96 AS builder
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
RUN rustup set profile minimal
|
RUN rustup set profile minimal
|
||||||
COPY . .
|
COPY . .
|
||||||
ARG NAMESPACE
|
RUN cargo build --release
|
||||||
ENV NAMESPACE=$NAMESPACE
|
|
||||||
RUN cargo build -p bulk_loader --release
|
|
||||||
|
|
||||||
FROM ubuntu:latest
|
FROM ubuntu:latest
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates
|
|
||||||
RUN update-ca-certificates
|
|
||||||
COPY --from=builder /app/target/release/bulk_loader /
|
COPY --from=builder /app/target/release/bulk_loader /
|
||||||
ENTRYPOINT ["/bulk_loader"]
|
ENTRYPOINT ["/bulk_loader"]
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,9 @@ use anyhow::Result;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
// Get the CKAN instance's Geoconnex namespace to filter for its JSON-LD data
|
|
||||||
let namespace = std::env!("NAMESPACE");
|
|
||||||
// Get latest release data which is organized as a single JSONL file
|
// Get latest release data which is organized as a single JSONL file
|
||||||
// at https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest
|
// at https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest
|
||||||
let body = reqwest::get(format!("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/{namespace}.jsonl"))
|
let body = reqwest::get("https://github.com/dathere/ckan_geoconnex_bulk_runner/releases/latest/download/ckan-geoconnex-web-resources.jsonl")
|
||||||
.await?
|
.await?
|
||||||
.text()
|
.text()
|
||||||
.await?;
|
.await?;
|
||||||
|
|
|
||||||
|
|
@ -6,25 +6,25 @@ use pyo3::prelude::*;
|
||||||
mod ckan_geoconnex_bulk_runner_py {
|
mod ckan_geoconnex_bulk_runner_py {
|
||||||
use pyo3::{exceptions::PyException, prelude::*};
|
use pyo3::{exceptions::PyException, prelude::*};
|
||||||
|
|
||||||
// #[pyfunction]
|
#[pyfunction]
|
||||||
// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata.
|
/// Construct Geoconnex-compatible JSON-LD as a string from dataset metadata.
|
||||||
//
|
///
|
||||||
// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string.
|
/// Input: Dataset metadata (output of /package_show for a CKAN dataset) as a string.
|
||||||
// Output: Constructed Geoconnex-compatible JSON-LD as a string.
|
/// Output: Constructed Geoconnex-compatible JSON-LD as a string.
|
||||||
// fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult<String> {
|
fn construct_dataset_jsonld_from_metadata(dataset_metadata: String) -> PyResult<String> {
|
||||||
// match serde_json::to_value(dataset_metadata) {
|
match serde_json::to_value(dataset_metadata) {
|
||||||
// Ok(dataset_json) => {
|
Ok(dataset_json) => {
|
||||||
// match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json)
|
match geoconnex_utils::jsonld::construct_dataset_jsonld_from_metadata(dataset_json)
|
||||||
// {
|
{
|
||||||
// Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| {
|
Ok(jsonld) => serde_json::to_string(&jsonld).map_err(|e| {
|
||||||
// PyException::new_err(format!(
|
PyException::new_err(format!(
|
||||||
// "Error when converting JSON-LD to string: {e}"
|
"Error when converting JSON-LD to string: {e}"
|
||||||
// ))
|
))
|
||||||
// }),
|
}),
|
||||||
// Err(e) => Err(PyException::new_err(e.to_string())),
|
Err(e) => Err(PyException::new_err(e.to_string())),
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
// Err(e) => Err(PyException::new_err(e.to_string())),
|
Err(e) => Err(PyException::new_err(e.to_string())),
|
||||||
// }
|
}
|
||||||
// }
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,15 @@ use std::collections::HashMap;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
let namespace = env!("NAMESPACE");
|
// Identify required header data
|
||||||
let token = env!("API_TOKEN");
|
let Ok(nmwdc_token) = std::env::var("NMWDC_API_BULK_LOADER_TOKEN") else {
|
||||||
let instance_url = env!("INSTANCE_URL");
|
bail!("Could not find environment variable NMWDC_API_BULK_LOADER_TOKEN.");
|
||||||
|
};
|
||||||
let mut headers = HashMap::new();
|
let mut headers = HashMap::new();
|
||||||
headers.insert("x-geoconnex-runner".to_string(), token.to_string());
|
headers.insert("x-geoconnex-runner".to_string(), nmwdc_token);
|
||||||
|
|
||||||
let ckan = ckanaction::CKAN::builder()
|
let ckan = ckanaction::CKAN::builder()
|
||||||
.url(instance_url)
|
.url("https://catalog.newmexicowaterdata.org")
|
||||||
.headers(headers)
|
.headers(headers)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
|
@ -67,8 +68,6 @@ async fn main() -> Result<()> {
|
||||||
// 2. Construct JSON-LD based on the data from /package_show
|
// 2. Construct JSON-LD based on the data from /package_show
|
||||||
let jsonld = match construct_dataset_jsonld_from_metadata(
|
let jsonld = match construct_dataset_jsonld_from_metadata(
|
||||||
dataset_metadata.to_owned(),
|
dataset_metadata.to_owned(),
|
||||||
instance_url.to_string(),
|
|
||||||
namespace.to_string(),
|
|
||||||
) {
|
) {
|
||||||
Ok(j) => j,
|
Ok(j) => j,
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,6 @@ use serde_json::json;
|
||||||
|
|
||||||
pub fn construct_dataset_jsonld_from_metadata(
|
pub fn construct_dataset_jsonld_from_metadata(
|
||||||
dataset_metadata: serde_json::Value,
|
dataset_metadata: serde_json::Value,
|
||||||
instance_url: String,
|
|
||||||
namespace: String,
|
|
||||||
) -> Result<serde_json::Value> {
|
) -> Result<serde_json::Value> {
|
||||||
let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap();
|
let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap();
|
||||||
eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}");
|
eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}");
|
||||||
|
|
@ -62,14 +60,14 @@ pub fn construct_dataset_jsonld_from_metadata(
|
||||||
},
|
},
|
||||||
"@type": "Dataset",
|
"@type": "Dataset",
|
||||||
// TODO: Customize namespace based on CKAN instance being used
|
// TODO: Customize namespace based on CKAN instance being used
|
||||||
"@id": format!("https://geoconnex.us/ckan/{namespace}/{dataset_id}"),
|
"@id": format!("https://geoconnex.us/ckan/sandbox/{dataset_id}"),
|
||||||
"name": dataset_title,
|
"name": dataset_title,
|
||||||
"provider": {
|
"provider": {
|
||||||
"@type": "Organization",
|
"@type": "Organization",
|
||||||
"name": organization_name
|
"name": organization_name
|
||||||
},
|
},
|
||||||
// TODO: Customize CKAN instance URL based on CKAN instance being used
|
// TODO: Customize CKAN instance URL based on CKAN instance being used
|
||||||
"url": format!("{instance_url}/dataset/{dataset_id}")
|
"url": format!("https://sandbox.opendataportal.us/dataset/{dataset_id}")
|
||||||
});
|
});
|
||||||
let jsonld_map = jsonld.as_object_mut().unwrap();
|
let jsonld_map = jsonld.as_object_mut().unwrap();
|
||||||
if about.len() > 0 {
|
if about.len() > 0 {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue