Instructions to use nvidia/E-RADIO with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use nvidia/E-RADIO with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="nvidia/E-RADIO", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("nvidia/E-RADIO", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| # Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # NVIDIA CORPORATION and its licensors retain all intellectual property | |
| # and proprietary rights in and to this software, related documentation | |
| # and any modifications thereto. Any use, reproduction, disclosure or | |
| # distribution of this software and related documentation without an express | |
| # license agreement from NVIDIA CORPORATION is strictly prohibited. | |
| from typing import Union, Tuple | |
| from types import MethodType | |
| import torch | |
| from torch import nn | |
| from timm.models import VisionTransformer, checkpoint_seq | |
| from .vit_patch_generator import ViTPatchGenerator | |
| def _forward_cpe(self: VisionTransformer, x: torch.Tensor) -> torch.Tensor: | |
| x = self.patch_generator(x) | |
| if self.grad_checkpointing and not torch.jit.is_scripting(): | |
| x = checkpoint_seq(self.blocks, x) | |
| else: | |
| x = self.blocks(x) | |
| x = self.norm(x) | |
| return x | |
| def enable_cpe(model: nn.Module, | |
| max_img_size: Union[int, Tuple[int, int]] = 1024, | |
| num_cls_tokens: int = 1, | |
| pos_dropout: float = 0.1, | |
| register_multiple: int = 0, | |
| ): | |
| if not isinstance(model, VisionTransformer): | |
| raise ValueError("CPE only support for VisionTransformer models!") | |
| patch_size = model.patch_embed.patch_size[0] | |
| embed_dim = model.embed_dim | |
| input_dims = model.patch_embed.img_size | |
| normalize_patches = not isinstance(model.patch_embed.norm, nn.Identity) | |
| cls_token = model.cls_token is not None | |
| max_img_size = int(round(max_img_size / patch_size) * patch_size) | |
| patch_generator = ViTPatchGenerator( | |
| patch_size=patch_size, | |
| embed_dim=embed_dim, | |
| input_dims=input_dims, | |
| normalize_patches=normalize_patches, | |
| cls_token=cls_token, | |
| max_input_dims=max_img_size, | |
| pos_dropout=pos_dropout, | |
| num_cls_tokens=num_cls_tokens, | |
| register_multiple=register_multiple, | |
| ) | |
| model.patch_generator = patch_generator | |
| model.patch_embed = None | |
| model.cls_token = None | |
| model.pos_embed = None | |
| model.pos_drop = None | |
| model.num_cls_tokens = num_cls_tokens | |
| model.num_registers = patch_generator.num_registers | |
| model.forward_features = MethodType(_forward_cpe, model) | |