Sentence Similarity
sentence-transformers
PyTorch
Transformers
English
t5
text-embedding
embeddings
information-retrieval
beir
text-classification
language-model
text-clustering
text-semantic-similarity
text-evaluation
prompt-retrieval
text-reranking
feature-extraction
English
Sentence Similarity
natural_questions
ms_marco
fever
hotpot_qa
mteb
Eval Results (legacy)
Instructions to use baseplate/instructor-large-1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use baseplate/instructor-large-1 with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("baseplate/instructor-large-1") sentences = [ "That is a happy person", "That is a happy dog", "That is a very happy person", "Today is a sunny day" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Transformers
How to use baseplate/instructor-large-1 with Transformers:
# Load model directly from transformers import AutoTokenizer, AutoModel tokenizer = AutoTokenizer.from_pretrained("baseplate/instructor-large-1") model = AutoModel.from_pretrained("baseplate/instructor-large-1") - Notebooks
- Google Colab
- Kaggle
| from typing import Dict, List, Any | |
| from InstructorEmbedding import INSTRUCTOR | |
| import torch | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| model = INSTRUCTOR(path) | |
| self.model = model | |
| if torch.cuda.is_available(): | |
| self.device = torch.device("cuda") | |
| self.model.to(self.device) | |
| else: | |
| self.device = torch.device("cpu") | |
| def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: | |
| """ | |
| data args: | |
| inputs (:obj: `str`) | |
| date (:obj: `str`) | |
| Return: | |
| A :obj:`list` | `dict`: will be serialized and returned | |
| """ | |
| # get inputs | |
| instruction = data.pop("instruction",data) | |
| text = data.pop("text", data) | |
| inputs = [[s, instruction] for s in text] | |
| embeddings = self.model.encode(inputs) | |
| return embeddings.tolist() |