diff --git a/server/routes/browser/html.py b/server/routes/browser/html.py
index 8dcabd3fc9..2147406609 100644
--- a/server/routes/browser/html.py
+++ b/server/routes/browser/html.py
@@ -24,6 +24,7 @@
import server.lib.render as lib_render
import server.lib.shared as shared_api
+import server.services.datacommons as dc
bp = Blueprint('browser', __name__, url_prefix='/browser')
@@ -47,7 +48,117 @@ def browser_node(dcid):
node_name = api_name
except Exception as e:
logging.info(e)
+
+ json_ld_data = {}
+ # Provenance nodes start with dc/base/
+ if dcid.startswith("dc/base/"):
+ json_ld_data = {
+ "@context": {
+ "@language": "en",
+ "@vocab": "https://schema.org/",
+ "sc": "https://schema.org/",
+ "cr": "http://mlcommons.org/croissant/",
+ "rai": "http://mlcommons.org/croissant/RAI/",
+ "dct": "http://purl.org/dc/terms/",
+ "citeAs": "cr:citeAs",
+ "column": "cr:column",
+ "conformsTo": "dct:conformsTo",
+ "data": {
+ "@id": "cr:data",
+ "@type": "@json"
+ },
+ "dataType": {
+ "@id": "cr:dataType",
+ "@type": "@vocab"
+ },
+ "examples": {
+ "@id": "cr:examples",
+ "@type": "@json"
+ },
+ "extract": "cr:extract",
+ "field": "cr:field",
+ "fileProperty": "cr:fileProperty",
+ "fileObject": "cr:fileObject",
+ "fileSet": "cr:fileSet",
+ "format": "cr:format",
+ "includes": "cr:includes",
+ "isLiveDataset": "cr:isLiveDataset",
+ "jsonPath": "cr:jsonPath",
+ "key": "cr:key",
+ "md5": "cr:md5",
+ "parentField": "cr:parentField",
+ "path": "cr:path",
+ "recordSet": "cr:recordSet",
+ "references": "cr:references",
+ "regex": "cr:regex",
+ "repeated": "cr:repeated",
+ "replace": "cr:replace",
+ "separator": "cr:separator",
+ "source": "cr:source",
+ "subField": "cr:subField",
+ "transform": "cr:transform"
+ },
+ "@type":
+ "Dataset",
+ "conformsTo":
+ "http://mlcommons.org/croissant/1.0",
+ "description":
+ f"This dataset contains all the data related to provenance {dcid}",
+ "url":
+ f"https://datacommons.org/browser/{dcid}",
+ "publisher": {
+ "@type": "Organization",
+ "name": "Data Commons",
+ "url": "https://datacommons.org"
+ }
+ }
+
+ try:
+ resp = dc.v2node([dcid], "->*")
+ data = resp.get("data", {}).get(dcid, {})
+ arcs = data.get("arcs", {})
+
+ # Extract the name from the isPartOf node (the Dataset)
+ is_part_of_nodes = arcs.get("isPartOf", {}).get("nodes", [])
+ if is_part_of_nodes and "name" in is_part_of_nodes[0]:
+ json_ld_data["name"] = is_part_of_nodes[0]["name"]
+
+ if "description" in arcs and arcs["description"].get("nodes"):
+ json_ld_data["description"] = arcs["description"]["nodes"][0].get(
+ "value", json_ld_data["description"])
+
+ # Fetch license
+ if "license" in arcs and arcs["license"].get("nodes"):
+ json_ld_data["license"] = arcs["license"]["nodes"][0].get("value")
+
+ # Fetch source
+ source_nodes = arcs.get("source", {}).get("nodes", [])
+
+ if source_nodes:
+ s_node = source_nodes[0]
+ s_name = s_node.get("name", s_node.get("value", ""))
+ s_dcid = s_node.get("dcid", "")
+
+ source_obj = {"@type": "Organization", "name": s_name}
+
+ # Fetch its external URL
+ if s_dcid:
+ try:
+ s_resp = dc.v2node([s_dcid], "->url")
+ s_url_nodes = s_resp.get("data", {}).get(s_dcid, {}).get(
+ "arcs", {}).get("url", {}).get("nodes", [])
+ if s_url_nodes:
+ source_obj["url"] = s_url_nodes[0].get("value")
+ except Exception as e:
+ logging.error("Error fetching source URL for %s: %s", s_dcid, e)
+
+ json_ld_data["creator"] = [source_obj]
+
+ except Exception as e:
+ logging.error("Error fetching metadata for %s: %s", dcid, e)
+
return render_template('/browser/node.html',
dcid=dcid,
node_name=node_name,
+ json_ld_data=json_ld_data,
maps_api_key=current_app.config['MAPS_API_KEY'])
diff --git a/server/templates/browser/node.html b/server/templates/browser/node.html
index 13737eca4e..7248c4c8da 100644
--- a/server/templates/browser/node.html
+++ b/server/templates/browser/node.html
@@ -21,6 +21,11 @@
{% set title = node_name + ' - Knowledge Graph' %}
{% block head %}
+ {% if json_ld_data %}
+
+ {% endif %}
{% endblock %}