Skip to main content

自托管

让我们加载 SelfHostedEmbeddingsSelfHostedHuggingFaceEmbeddingsSelfHostedHuggingFaceInstructEmbeddings 类。

import runhouse as rh
from langchain_community.embeddings import (
SelfHostedEmbeddings,
SelfHostedHuggingFaceEmbeddings,
SelfHostedHuggingFaceInstructEmbeddings,
)
# 对于 GCP、Azure 或 Lambda 的按需 A100
gpu = rh.cluster(name="rh-a10x", instance_type="A100:1", use_spot=False)

# 对于 AWS 的按需 A10G(AWS 上没有单个 A100)
# gpu = rh.cluster(name='rh-a10x', instance_type='g5.2xlarge', provider='aws')

# 对于现有集群
# gpu = rh.cluster(ips=['<ip of the cluster>'],
# ssh_creds={'ssh_user': '...', 'ssh_private_key':'<path_to_key>'},
# name='my-cluster')
embeddings = SelfHostedHuggingFaceEmbeddings(hardware=gpu)
text = "This is a test document."
query_result = embeddings.embed_query(text)

对于 SelfHostedHuggingFaceInstructEmbeddings 也是如此:

embeddings = SelfHostedHuggingFaceInstructEmbeddings(hardware=gpu)

现在让我们使用自定义加载函数加载一个嵌入模型:

def get_pipeline():
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
pipeline,
)

model_id = "facebook/bart-base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
return pipeline("feature-extraction", model=model, tokenizer=tokenizer)


def inference_fn(pipeline, prompt):
# 返回模型的最后隐藏状态
if isinstance(prompt, list):
return [emb[0][-1] for emb in pipeline(prompt)]
return pipeline(prompt)[0][-1]
embeddings = SelfHostedEmbeddings(
model_load_fn=get_pipeline,
hardware=gpu,
model_reqs=["./", "torch", "transformers"],
inference_fn=inference_fn,
)
query_result = embeddings.embed_query(text)

相关


此页面是否有帮助?


您还可以留下详细的反馈 在 GitHub 上