This repository was archived by the owner on Oct 31, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 467
This repository was archived by the owner on Oct 31, 2023. It is now read-only.
Same sentence, different encoding! #141
Copy link
Copy link
Open
Description
`
import sys
sys.path.append('../')
import os
import torch
import math
import numpy as np
from infersent_model import InferSent
EPS = 1e-4
def cosine(u, v):
return np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))
if name == 'main':
# opt = myopts.parse_opt()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_version = 1
MODEL_PATH = './encoder'
assert MODEL_PATH is not None, '--infersent_model_path is None!'
MODEL_PATH = os.path.join(MODEL_PATH, 'infersent%s.pkl' % model_version)
params_model = {
'bsize': 64,
'word_emb_dim': 300,
'enc_lstm_dim': 2048,
'pool_type': 'max',
'dpout_model': 0.0,
'version': model_version
}
infersent_model = InferSent(params_model)
infersent_model.load_state_dict(torch.load(MODEL_PATH))
infersent_model = infersent_model.to(device)
W2V_PATH = './Glove/glove.840B.300d.txt'
assert W2V_PATH is not None, '--w2v_path is None!'
infersent_model.set_w2v_path(W2V_PATH)
infersent_model.build_vocab_k_words(K=100000)
store = ['a man is talking about a movie pictures of a movie pictures' ,
'a person is folding paper',
'a man is singing',
'people are dancing and dancing',
'a man and woman are talking about something',
'a woman is applying makeup',
'a person is cooking a dish and adding ingredients into a pot',
'a man is talking',
'a man is talking about the weather on the screen',
'cartoon characters are interacting']
# encoding sentences together
embeddings = infersent_model.encode(store, bsize=128, tokenize=True)
for i in range(len(store)):
# encoding ith sentence alone
temp = infersent_model.encode([store[i]], bsize=128, tokenize=True)[0]
# calculate Cosine Similarity between ith sentence which is encoded alone
# and ith sentence which is encoded together with others sentences
if math.fabs(1 - cosine(temp, embeddings[i])) > EPS:
print(cosine(temp, embeddings[i]))
`
and here is the output:
Vocab size : 100000
0.9066778
0.87379414
0.89509517
0.9344797
0.9010086
0.8247624
0.9670602
0.9080478
really weird, isn't it?
Since all parameters are frozen, how could this happen?
Metadata
Metadata
Assignees
Labels
No labels