mirror of
https://github.com/dathere/ckan_geoconnex_bulk_runner.git
synced 2026-07-05 23:22:20 +00:00
feat: enhanced cargo workspace, NM usage, Dockerfile
This commit is contained in:
parent
71b08a53f0
commit
3a79fb2b0a
18 changed files with 362 additions and 2478 deletions
77
geoconnex_utils/src/jsonld.rs
Normal file
77
geoconnex_utils/src/jsonld.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
use anyhow::{Result, bail};
|
||||
use serde_json::json;
|
||||
|
||||
pub fn construct_dataset_jsonld_from_metadata(
|
||||
dataset_metadata: serde_json::Value,
|
||||
) -> Result<serde_json::Value> {
|
||||
let dataset_id = dataset_metadata.get("id").unwrap().as_str().unwrap();
|
||||
eprintln!("Attempting to construct JSON-LD for dataset {dataset_id}");
|
||||
let dataset_title = dataset_metadata.get("title").unwrap().as_str().unwrap();
|
||||
let organization_name = dataset_metadata
|
||||
.get("organization")
|
||||
.unwrap()
|
||||
.get("title")
|
||||
.unwrap();
|
||||
// TODO: Align and include Geoconnex PIDs for reference feature categories to extract PIDs from them
|
||||
// Then also convert spatial_full FeatureCollection to Multipolygon if needed for gsp:hasGeometry when there are
|
||||
// also non-reference feature polygons
|
||||
let mut about = vec![];
|
||||
if let Some(spatial_full) = dataset_metadata.get("spatial_full") {
|
||||
let Some(spatial_full_str) = spatial_full.as_str() else {
|
||||
bail!("Could not parse spatial_full as string.");
|
||||
};
|
||||
if !spatial_full_str.is_empty() {
|
||||
let Ok(spatial_full_json) = serde_json::from_str::<serde_json::Value>(spatial_full_str)
|
||||
else {
|
||||
bail!(
|
||||
"Error while attempting to deserialize spatial_full string to serde_json::Value."
|
||||
);
|
||||
};
|
||||
let Some(features_value) = spatial_full_json.get("features") else {
|
||||
bail!("Error while attempting to get value of features from spatial_full GeoJSON.");
|
||||
};
|
||||
let Some(features) = features_value.as_array() else {
|
||||
bail!(
|
||||
"Error while attempting to take features value as array from spatial_full GeoJSON."
|
||||
);
|
||||
};
|
||||
for feature in features {
|
||||
let Some(properties) = feature.get("properties") else {
|
||||
bail!(
|
||||
"Error while attempting to get properties from features from spatial_full GeoJSON."
|
||||
);
|
||||
};
|
||||
if let Some(pid) = properties.get("pid") {
|
||||
let Some(pid_string) = pid.as_str() else {
|
||||
bail!("Error while attempting to convert PID as str from &Value.");
|
||||
};
|
||||
about.push(json!({
|
||||
"@id": pid_string,
|
||||
"@type": "Place"
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut jsonld = json!({
|
||||
"@context": {
|
||||
"@vocab": "https://schema.org/",
|
||||
"gsp": "http://www.opengis.net/ont/geosparql#",
|
||||
},
|
||||
"@type": "Dataset",
|
||||
// TODO: Customize namespace based on CKAN instance being used
|
||||
"@id": format!("https://geoconnex.us/ckan/sandbox/{dataset_id}"),
|
||||
"name": dataset_title,
|
||||
"provider": {
|
||||
"@type": "Organization",
|
||||
"name": organization_name
|
||||
},
|
||||
// TODO: Customize CKAN instance URL based on CKAN instance being used
|
||||
"url": format!("https://sandbox.opendataportal.us/dataset/{dataset_id}")
|
||||
});
|
||||
let jsonld_map = jsonld.as_object_mut().unwrap();
|
||||
if about.len() > 0 {
|
||||
jsonld_map.insert("about".to_string(), serde_json::to_value(about).unwrap());
|
||||
}
|
||||
Ok(serde_json::to_value(jsonld_map).unwrap())
|
||||
}
|
||||
2
geoconnex_utils/src/lib.rs
Normal file
2
geoconnex_utils/src/lib.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod jsonld;
|
||||
pub mod schema;
|
||||
76
geoconnex_utils/src/schema.rs
Normal file
76
geoconnex_utils/src/schema.rs
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
use serde_json::json;
|
||||
|
||||
pub fn get_dataset_schema() -> serde_json::Value {
|
||||
// Allow for "local" feature
|
||||
#[allow(unused_mut)]
|
||||
let mut dataset_schema = json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"anyOf": [{"const": "Dataset"}, {"const": "schema:Dataset"}]}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"schema:name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"schema:provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"gsp:hasGeometry": {"@type": "object"},
|
||||
"about": {
|
||||
"type": ["string", "array"],
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {"@id": {"type": "string"}, "@type": {"const": "Place"}},
|
||||
},
|
||||
"minItems": 1
|
||||
},
|
||||
},
|
||||
"anyOf": [
|
||||
{ "required": ["@context", "@type", "@id", "name", "provider", "about"] },
|
||||
{ "required": ["@context", "@type", "@id", "name", "provider", "gsp:hasGeometry"] },
|
||||
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "about"] },
|
||||
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "gsp:hasGeometry"] },
|
||||
// { "required": ["@context", "@type", "@id", "name", "provider"] }
|
||||
]
|
||||
});
|
||||
// Some JSON-LD for datasets (e.g. sciencebase) do not have about or gsp:hasGeometry yet are still valid as per SHACL shape
|
||||
#[cfg(feature = "local")]
|
||||
{
|
||||
let required_array = dataset_schema
|
||||
.get_mut("anyOf")
|
||||
.unwrap()
|
||||
.as_array_mut()
|
||||
.unwrap();
|
||||
required_array.insert(
|
||||
required_array.len(),
|
||||
json!({ "required": ["@context", "@type", "@id", "schema:name", "schema:provider"] }),
|
||||
);
|
||||
}
|
||||
dataset_schema
|
||||
}
|
||||
|
||||
pub fn get_location_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"@context": {"type": ["string", "object"]},
|
||||
"@type": {"type": ["string", "array"], "contains": {"const": "Place"}},
|
||||
"@id": {"type": "string"},
|
||||
"name": {"type": "string"},
|
||||
"provider": {
|
||||
"type": "object",
|
||||
"properties": {"@type": {"type": "string"}, "name": {"type": "string"}},
|
||||
},
|
||||
"geo": {"type": "object"},
|
||||
"gsp:hasGeometry": {"type": "object"}
|
||||
},
|
||||
"anyOf": [
|
||||
{ "required": ["@context", "@type", "@id", "name", "provider", "geo", "gsp:hasGeometry"] },
|
||||
{ "required": ["@context", "@type", "@id", "schema:name", "schema:provider", "geo", "gsp:hasGeometry"] },
|
||||
]
|
||||
})
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue