MLMI2-CSSI · blaiszik · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026 · Jan 14, 2026
diff --git a/README.md b/README.md
@@ -77,6 +77,26 @@ foundry --help
 foundry mcp install  # Add to Claude Code
 ```
 
+## Export to HuggingFace Hub
+
+Share datasets with the broader ML community:
+
+```bash
+pip install "foundry-ml[huggingface]"
+foundry push-to-hf 10.18126/abc123 --repo your-username/dataset-name
+```
+
+Or programmatically:
+
+```python
+from foundry.integrations.huggingface import push_to_hub
+
+dataset = f.get_dataset("10.18126/abc123")
+push_to_hub(dataset, repo_id="your-username/dataset-name")
+```
+
+Exports include auto-generated Dataset Cards with full metadata, citations, and field documentation.
+
 ## Documentation
 
 - [Getting Started](https://ai-materials-and-chemistry.gitbook.io/foundry/quickstart)

diff --git a/examples/00_hello_foundry/hello_foundry.ipynb b/examples/00_hello_foundry/hello_foundry.ipynb
diff --git a/examples/01_quickstart/quickstart.ipynb b/examples/01_quickstart/quickstart.ipynb
diff --git a/examples/02_working_with_data/working_with_data.ipynb b/examples/02_working_with_data/working_with_data.ipynb
@@ -1,5 +1,10 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "source": "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MLMI2-CSSI/foundry/blob/main/examples/02_working_with_data/working_with_data.ipynb)\n\n---",
+   "metadata": {}
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -20,10 +25,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Collecting pyarrow>=16.1.0\n",
+      "  Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (3.2 kB)\n",
+      "Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl (34.2 MB)\n",
+      "\u001b[2K   \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.2/34.2 MB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
+      "\u001b[?25hInstalling collected packages: pyarrow\n",
+      "  Attempting uninstall: pyarrow\n",
+      "    Found existing installation: pyarrow 15.0.0\n",
+      "    Uninstalling pyarrow-15.0.0:\n",
+      "      Successfully uninstalled pyarrow-15.0.0\n",
+      "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+      "taipy-core 3.1.1 requires pandas<=2.2.0,>=1.3.5, but you have pandas 2.3.1 which is incompatible.\n",
+      "taipy-core 3.1.1 requires pyarrow<=15.0.0,>=14.0.2, but you have pyarrow 22.0.0 which is incompatible.\n",
+      "streamlit 1.36.0 requires packaging<25,>=20, but you have packaging 25.0 which is incompatible.\n",
+      "streamlit 1.36.0 requires rich<14,>=10.14.0, but you have rich 14.2.0 which is incompatible.\n",
+      "datasets 2.19.0 requires dill<0.3.9,>=0.3.0, but you have dill 0.3.9 which is incompatible.\u001b[0m\u001b[31m\n",
+      "\u001b[0mSuccessfully installed pyarrow-22.0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "!pip install --upgrade \"pyarrow>=16.1.0\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
-   "source": "from foundry import Foundry\n\n# HTTPS download is now the default\nf = Foundry()"
+   "source": [
+    "from foundry import Foundry\n",
+    "\n",
+    "# HTTPS download is now the default\n",
+    "f = Foundry()"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -39,21 +81,49 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Get a dataset\nresults = f.search(\"band gap\", limit=1)\ndataset = results.iloc[0].FoundryDataset\n\n# Get the schema\nschema = dataset.get_schema()\n\nprint(f\"Dataset: {schema['name']}\")\nprint(f\"Title: {schema['title']}\")\nprint(f\"DOI: {schema['doi']}\")\nprint(f\"Data Type: {schema['data_type']}\")"
+   "source": [
+    "# Get a dataset\n",
+    "results = f.search(\"band gap\", limit=1)\n",
+    "dataset = results.iloc[0].FoundryDataset\n",
+    "\n",
+    "# Get the schema\n",
+    "schema = dataset.get_schema()\n",
+    "\n",
+    "print(f\"Dataset: {schema['name']}\")\n",
+    "print(f\"Title: {schema['title']}\")\n",
+    "print(f\"DOI: {schema['doi']}\")\n",
+    "print(f\"Data Type: {schema['data_type']}\")"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Examine fields (columns)\nprint(\"Fields:\")\nprint(\"-\" * 60)\nfor field in schema['fields']:\n    role = field['role']  # 'input' or 'target'\n    name = field['name']\n    desc = field['description'] or 'No description'\n    units = field['units'] or ''\n    print(f\"  [{role:6}] {name}: {desc} {f'({units})' if units else ''}\")"
+   "source": [
+    "# Examine fields (columns)\n",
+    "print(\"Fields:\")\n",
+    "print(\"-\" * 60)\n",
+    "for field in schema['fields']:\n",
+    "    role = field['role']  # 'input' or 'target'\n",
+    "    name = field['name']\n",
+    "    desc = field['description'] or 'No description'\n",
+    "    units = field['units'] or ''\n",
+    "    print(f\"  [{role:6}] {name}: {desc} {f'({units})' if units else ''}\")"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Examine splits (train/test/validation)\nprint(\"Splits:\")\nprint(\"-\" * 60)\nfor split in schema['splits']:\n    print(f\"  - {split['name']}: {split.get('type', 'data')}\")"
+   "source": [
+    "# Examine splits (train/test/validation)\n",
+    "print(\"Splits:\")\n",
+    "print(\"-\" * 60)\n",
+    "for split in schema['splits']:\n",
+    "    print(f\"  - {split['name']}: {split.get('type', 'data')}\")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -117,7 +187,14 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Get data with schema attached\nresult = dataset.get_as_dict(include_schema=True)\n\nprint(f\"Result keys: {result.keys()}\")\nprint(f\"\\nSchema name: {result['schema']['name']}\")\nprint(f\"Data splits: {list(result['data'].keys())}\")"
+   "source": [
+    "# Get data with schema attached\n",
+    "result = dataset.get_as_dict(include_schema=True)\n",
+    "\n",
+    "print(f\"Result keys: {result.keys()}\")\n",
+    "print(f\"\\nSchema name: {result['schema']['name']}\")\n",
+    "print(f\"Data splits: {list(result['data'].keys())}\")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -193,14 +270,33 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Search with JSON output\n# as_json=True returns a list of dicts instead of a DataFrame\nresults_json = f.search(\"band gap\", limit=3, as_json=True)\n\nprint(f\"Type: {type(results_json)}\")\nprint(f\"Number of results: {len(results_json)}\")\n\nfor ds in results_json:\n    print(f\"\\n- {ds['name']}\")\n    print(f\"  Title: {ds['title']}\")\n    print(f\"  DOI: {ds['doi']}\")\n    print(f\"  Fields: {ds.get('fields', [])}\")"
+   "source": [
+    "# Search with JSON output\n",
+    "# as_json=True returns a list of dicts instead of a DataFrame\n",
+    "results_json = f.search(\"band gap\", limit=3, as_json=True)\n",
+    "\n",
+    "print(f\"Type: {type(results_json)}\")\n",
+    "print(f\"Number of results: {len(results_json)}\")\n",
+    "\n",
+    "for ds in results_json:\n",
+    "    print(f\"\\n- {ds['name']}\")\n",
+    "    print(f\"  Title: {ds['title']}\")\n",
+    "    print(f\"  DOI: {ds['doi']}\")\n",
+    "    print(f\"  Fields: {ds.get('fields', [])}\")"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# List all datasets as JSON\nimport json\n\nall_datasets = f.list(limit=5, as_json=True)\nprint(json.dumps(all_datasets[0], indent=2))"
+   "source": [
+    "# List all datasets as JSON\n",
+    "import json\n",
+    "\n",
+    "all_datasets = f.list(limit=5, as_json=True)\n",
+    "print(json.dumps(all_datasets[0], indent=2))"
+   ]
   },
   {
    "cell_type": "markdown",

diff --git a/examples/03_advanced_workflows/advanced_workflows.ipynb b/examples/03_advanced_workflows/advanced_workflows.ipynb
@@ -1,5 +1,10 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "source": "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MLMI2-CSSI/foundry/blob/main/examples/03_advanced_workflows/advanced_workflows.ipynb)\n\n---",
+   "metadata": {}
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/examples/atom-position-finding/atom_position_finding.ipynb b/examples/atom-position-finding/atom_position_finding.ipynb