1
from sklearn.metrics.pairwise import linear_kernel
sim_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)

when I try to get dot product I am getting this errro

MemoryError                               Traceback (most recent call last)
<ipython-input-19-2c4d43d4a89e> in <module>
      1 from sklearn.metrics.pairwise import linear_kernel
----> 2 sim_matrix = linear_kernel(tfidf_matrix, tfidf_matrix)

~\anaconda3\lib\site-packages\sklearn\metrics\pairwise.py in linear_kernel(X, Y, dense_output) 1002 """ 1003 X, Y = check_pairwise_arrays(X, Y) -> 1004 return safe_sparse_dot(X, Y.T, dense_output=dense_output) 1005 1006

~\anaconda3\lib\site-packages\sklearn\utils\validation.py in inner_f(args, kwargs) 70 FutureWarning) 71 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)}) ---> 72 return f(*kwargs) 73 return inner_f 74

~\anaconda3\lib\site-packages\sklearn\utils\extmath.py in safe_sparse_dot(a, b, dense_output) 151 ret = np.dot(a, b) 152 else: --> 153 ret = a @ b 154 155 if (sparse.issparse(a) and sparse.issparse(b)

~\anaconda3\lib\site-packages\scipy\sparse\base.py in matmul(self, other) 558 raise ValueError("Scalar operands are not allowed, " 559 "use '*' instead") --> 560 return self.mul(other) 561 562 def rmatmul(self, other):

~\anaconda3\lib\site-packages\scipy\sparse\base.py in mul(self, other) 478 if self.shape[1] != other.shape[0]: 479 raise ValueError('dimension mismatch') --> 480 return self._mul_sparse_matrix(other) 481 482 # If it's a list or whatever, treat it like a matrix

~\anaconda3\lib\site-packages\scipy\sparse\compressed.py in _mul_sparse_matrix(self, other) 514 515 indptr = np.empty(major_axis + 1, dtype=idx_dtype) --> 516 indices = np.empty(nnz, dtype=idx_dtype) 517 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype)) 518

MemoryError: Unable to allocate 2.04 GiB for an array with shape (546860044,) and data type int32

Nikos M.
  • 2,493
  • 1
  • 7
  • 11

0 Answers0