-
Notifications
You must be signed in to change notification settings - Fork 22
Open
Description
model = FastFit.from_pretrained("fast-fit")
model
gives
FastFit(
(encoder): MPNetModel(
(embeddings): MPNetEmbeddings(
(word_embeddings): Embedding(30527, 768, padding_idx=1)
(position_embeddings): Embedding(514, 768, padding_idx=1)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(encoder): MPNetEncoder(
(layer): ModuleList(
(0-11): 12 x MPNetLayer(
(attention): MPNetAttention(
(attn): MPNetSelfAttention(
(q): Linear(in_features=768, out_features=768, bias=True)
(k): Linear(in_features=768, out_features=768, bias=True)
(v): Linear(in_features=768, out_features=768, bias=True)
(o): Linear(in_features=768, out_features=768, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
(intermediate): MPNetIntermediate(
(dense): Linear(in_features=768, out_features=3072, bias=True)
(intermediate_act_fn): GELUActivation()
)
(output): MPNetOutput(
(dense): Linear(in_features=3072, out_features=768, bias=True)
(LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
(dropout): Dropout(p=0.1, inplace=False)
)
)
)
(relative_attention_bias): Embedding(32, 12)
)
(pooler): MPNetPooler(
(dense): Linear(in_features=768, out_features=768, bias=True)
(activation): Tanh()
)
)
(projection): Linear(in_features=768, out_features=128, bias=False)
(clf): Linear(in_features=768, out_features=17999, bias=True)
(dropout): Dropout(p=0.1, inplace=False)
(batch_norm): BatchNorm1d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(clf_criterion): CrossEntropyLoss()
(sim_criterion): SupConLoss()
(all_docs): ParameterList(
(0): Parameter containing: [torch.int64 of size 17999x10]
(1): Parameter containing: [torch.int64 of size 17999x10]
)
)
plison, alexandergrote and philmas
Metadata
Metadata
Assignees
Labels
No labels