Skip to content
Open
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,26 @@ foundry --help
foundry mcp install # Add to Claude Code
```

## Export to HuggingFace Hub

Share datasets with the broader ML community:

```bash
pip install "foundry-ml[huggingface]"
foundry push-to-hf 10.18126/abc123 --repo your-username/dataset-name
```

Or programmatically:

```python
from foundry.integrations.huggingface import push_to_hub

dataset = f.get_dataset("10.18126/abc123")
push_to_hub(dataset, repo_id="your-username/dataset-name")
```

Exports include auto-generated Dataset Cards with full metadata, citations, and field documentation.

## Documentation

- [Getting Started](https://ai-materials-and-chemistry.gitbook.io/foundry/quickstart)
Expand Down
187 changes: 122 additions & 65 deletions examples/00_hello_foundry/hello_foundry.ipynb

Large diffs are not rendered by default.

128 changes: 119 additions & 9 deletions examples/01_quickstart/quickstart.ipynb

Large diffs are not rendered by default.

112 changes: 104 additions & 8 deletions examples/02_working_with_data/working_with_data.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
{
"cells": [
{
"cell_type": "markdown",
"source": "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MLMI2-CSSI/foundry/blob/main/examples/02_working_with_data/working_with_data.ipynb)\n\n---",
"metadata": {}
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -20,10 +25,47 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Collecting pyarrow>=16.1.0\n",
" Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (3.2 kB)\n",
"Downloading pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl (34.2 MB)\n",
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.2/34.2 MB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
"\u001b[?25hInstalling collected packages: pyarrow\n",
" Attempting uninstall: pyarrow\n",
" Found existing installation: pyarrow 15.0.0\n",
" Uninstalling pyarrow-15.0.0:\n",
" Successfully uninstalled pyarrow-15.0.0\n",
"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
"taipy-core 3.1.1 requires pandas<=2.2.0,>=1.3.5, but you have pandas 2.3.1 which is incompatible.\n",
"taipy-core 3.1.1 requires pyarrow<=15.0.0,>=14.0.2, but you have pyarrow 22.0.0 which is incompatible.\n",
"streamlit 1.36.0 requires packaging<25,>=20, but you have packaging 25.0 which is incompatible.\n",
"streamlit 1.36.0 requires rich<14,>=10.14.0, but you have rich 14.2.0 which is incompatible.\n",
"datasets 2.19.0 requires dill<0.3.9,>=0.3.0, but you have dill 0.3.9 which is incompatible.\u001b[0m\u001b[31m\n",
"\u001b[0mSuccessfully installed pyarrow-22.0.0\n"
]
}
],
"source": [
"!pip install --upgrade \"pyarrow>=16.1.0\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": "from foundry import Foundry\n\n# HTTPS download is now the default\nf = Foundry()"
"source": [
"from foundry import Foundry\n",
"\n",
"# HTTPS download is now the default\n",
"f = Foundry()"
]
},
{
"cell_type": "markdown",
Expand All @@ -39,21 +81,49 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Get a dataset\nresults = f.search(\"band gap\", limit=1)\ndataset = results.iloc[0].FoundryDataset\n\n# Get the schema\nschema = dataset.get_schema()\n\nprint(f\"Dataset: {schema['name']}\")\nprint(f\"Title: {schema['title']}\")\nprint(f\"DOI: {schema['doi']}\")\nprint(f\"Data Type: {schema['data_type']}\")"
"source": [
"# Get a dataset\n",
"results = f.search(\"band gap\", limit=1)\n",
"dataset = results.iloc[0].FoundryDataset\n",
"\n",
"# Get the schema\n",
"schema = dataset.get_schema()\n",
"\n",
"print(f\"Dataset: {schema['name']}\")\n",
"print(f\"Title: {schema['title']}\")\n",
"print(f\"DOI: {schema['doi']}\")\n",
"print(f\"Data Type: {schema['data_type']}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Examine fields (columns)\nprint(\"Fields:\")\nprint(\"-\" * 60)\nfor field in schema['fields']:\n role = field['role'] # 'input' or 'target'\n name = field['name']\n desc = field['description'] or 'No description'\n units = field['units'] or ''\n print(f\" [{role:6}] {name}: {desc} {f'({units})' if units else ''}\")"
"source": [
"# Examine fields (columns)\n",
"print(\"Fields:\")\n",
"print(\"-\" * 60)\n",
"for field in schema['fields']:\n",
" role = field['role'] # 'input' or 'target'\n",
" name = field['name']\n",
" desc = field['description'] or 'No description'\n",
" units = field['units'] or ''\n",
" print(f\" [{role:6}] {name}: {desc} {f'({units})' if units else ''}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Examine splits (train/test/validation)\nprint(\"Splits:\")\nprint(\"-\" * 60)\nfor split in schema['splits']:\n print(f\" - {split['name']}: {split.get('type', 'data')}\")"
"source": [
"# Examine splits (train/test/validation)\n",
"print(\"Splits:\")\n",
"print(\"-\" * 60)\n",
"for split in schema['splits']:\n",
" print(f\" - {split['name']}: {split.get('type', 'data')}\")"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -117,7 +187,14 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Get data with schema attached\nresult = dataset.get_as_dict(include_schema=True)\n\nprint(f\"Result keys: {result.keys()}\")\nprint(f\"\\nSchema name: {result['schema']['name']}\")\nprint(f\"Data splits: {list(result['data'].keys())}\")"
"source": [
"# Get data with schema attached\n",
"result = dataset.get_as_dict(include_schema=True)\n",
"\n",
"print(f\"Result keys: {result.keys()}\")\n",
"print(f\"\\nSchema name: {result['schema']['name']}\")\n",
"print(f\"Data splits: {list(result['data'].keys())}\")"
]
},
{
"cell_type": "markdown",
Expand Down Expand Up @@ -193,14 +270,33 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# Search with JSON output\n# as_json=True returns a list of dicts instead of a DataFrame\nresults_json = f.search(\"band gap\", limit=3, as_json=True)\n\nprint(f\"Type: {type(results_json)}\")\nprint(f\"Number of results: {len(results_json)}\")\n\nfor ds in results_json:\n print(f\"\\n- {ds['name']}\")\n print(f\" Title: {ds['title']}\")\n print(f\" DOI: {ds['doi']}\")\n print(f\" Fields: {ds.get('fields', [])}\")"
"source": [
"# Search with JSON output\n",
"# as_json=True returns a list of dicts instead of a DataFrame\n",
"results_json = f.search(\"band gap\", limit=3, as_json=True)\n",
"\n",
"print(f\"Type: {type(results_json)}\")\n",
"print(f\"Number of results: {len(results_json)}\")\n",
"\n",
"for ds in results_json:\n",
" print(f\"\\n- {ds['name']}\")\n",
" print(f\" Title: {ds['title']}\")\n",
" print(f\" DOI: {ds['doi']}\")\n",
" print(f\" Fields: {ds.get('fields', [])}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": "# List all datasets as JSON\nimport json\n\nall_datasets = f.list(limit=5, as_json=True)\nprint(json.dumps(all_datasets[0], indent=2))"
"source": [
"# List all datasets as JSON\n",
"import json\n",
"\n",
"all_datasets = f.list(limit=5, as_json=True)\n",
"print(json.dumps(all_datasets[0], indent=2))"
]
},
{
"cell_type": "markdown",
Expand Down
5 changes: 5 additions & 0 deletions examples/03_advanced_workflows/advanced_workflows.ipynb
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
{
"cells": [
{
"cell_type": "markdown",
"source": "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/MLMI2-CSSI/foundry/blob/main/examples/03_advanced_workflows/advanced_workflows.ipynb)\n\n---",
"metadata": {}
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
57 changes: 19 additions & 38 deletions examples/atom-position-finding/atom_position_finding.ipynb

Large diffs are not rendered by default.

Loading
Loading