feat: enhanced cargo workspace, NM usage, Dockerfile

This commit is contained in:
rzmk 2026-06-15 11:20:51 -04:00
parent 71b08a53f0
commit 3a79fb2b0a
18 changed files with 362 additions and 2478 deletions

View file

@ -0,0 +1,77 @@
use anyhow::{Result, bail};
use serde_json::json;
pub fn construct_dataset_jsonld_from_metadata(
dataset_metadata: serde_json::Value,
) -> Result<serde_json::Value> {
let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap();
eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}");
let dataset_title = dataset_metadata.get("title").unwrap().as_str().unwrap();
let organization_name = dataset_metadata
.get("organization")
.unwrap()
.get("title")
.unwrap();
// TODO: Align and include Geoconnex PIDs for reference feature categories to extract PIDs from them
// Then also convert spatial_full FeatureCollection to Multipolygon if needed for gsp:hasGeometry when there are
// also non-reference feature polygons
let mut about = vec![];
if let Some(spatial_full) = dataset_metadata.get("spatial_full") {
let Some(spatial_full_str) = spatial_full.as_str() else {
bail!("Could not parse spatial_full as string.");
};
if !spatial_full_str.is_empty() {
let Ok(spatial_full_json) = serde_json::from_str::<serde_json::Value>(spatial_full_str)
else {
bail!(
"Error while attempting to deserialize spatial_full string to serde_json::Value."
);
};
let Some(features_value) = spatial_full_json.get("features") else {
bail!("Error while attempting to get value of features from spatial_full GeoJSON.");
};
let Some(features) = features_value.as_array() else {
bail!(
"Error while attempting to take features value as array from spatial_full GeoJSON."
);
};
for feature in features {
let Some(properties) = feature.get("properties") else {
bail!(
"Error while attempting to get properties from features from spatial_full GeoJSON."
);
};
if let Some(pid) = properties.get("pid") {
let Some(pid_string) = pid.as_str() else {
bail!("Error while attempting to convert PID as str from &Value.");
};
about.push(json!({
"@id": pid_string,
"@type": "Place"
}));
}
}
}
}
let mut jsonld = json!({
"@context": {
"@vocab": "https://schema.org/",
"gsp": "http://www.opengis.net/ont/geosparql#",
},
"@type": "Dataset",
// TODO: Customize namespace based on CKAN instance being used
"@id": format!("https://geoconnex.us/ckan/sandbox/{dataset_id}"),
"name": dataset_title,
"provider": {
"@type": "Organization",
"name": organization_name
},
// TODO: Customize CKAN instance URL based on CKAN instance being used
"url": format!("https://sandbox.opendataportal.us/dataset/{dataset_id}")
});
let jsonld_map = jsonld.as_object_mut().unwrap();
if about.len() > 0 {
jsonld_map.insert("about".to_string(), serde_json::to_value(about).unwrap());
}
Ok(serde_json::to_value(jsonld_map).unwrap())
}

View file

@ -0,0 +1,2 @@
pub mod jsonld;
pub mod schema;

View file

@ -0,0 +1,76 @@
use serde_json::json;
pub fn get_dataset_schema() -> serde_json::Value {
// Allow for "local" feature
#[allow(unused_mut)]
let mut dataset_schema = json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"anyOf": [{"const": "Dataset"}, {"const": "schema:Dataset"}]}},
"@id": {"type": "string"},
"name": {"type": "string"},
"schema:name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"schema:provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"gsp:hasGeometry": {"@type": "object"},
"about": {
"type": ["string", "array"],
"items": {
"type": "object",
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
},
"minItems": 1
},
},
"anyOf": [
{ "required": ["@context", "@type", "@id", "name", "provider", "about"] },
{ "required": ["@context", "@type", "@id", "name", "provider", "gsp:hasGeometry"] },
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "about"] },
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "gsp:hasGeometry"] },
// { "required": ["@context", "@type", "@id", "name", "provider"] }
]
});
// Some JSON-LD for datasets (e.g. sciencebase) do not have about or gsp:hasGeometry yet are still valid as per SHACL shape
#[cfg(feature = "local")]
{
let required_array = dataset_schema
.get_mut("anyOf")
.unwrap()
.as_array_mut()
.unwrap();
required_array.insert(
required_array.len(),
json!({ "required": ["@context", "@type", "@id", "schema:name", "schema:provider"] }),
);
}
dataset_schema
}
pub fn get_location_schema() -> serde_json::Value {
json!({
"type": "object",
"properties": {
"@context": {"type": ["string", "object"]},
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
"@id": {"type": "string"},
"name": {"type": "string"},
"provider": {
"type": "object",
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
},
"geo": {"type": "object"},
"gsp:hasGeometry": {"type": "object"}
},
"anyOf": [
{ "required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"] },
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "geo", "gsp:hasGeometry"] },
]
})
}