TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType
See original GitHub issueHello,
I am getting the error TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType
whenever I want to change any of the default hyperparameters at the initilization of BERTopic()
. For instance:
topic_model = BERTopic(language='french',#) #,
#top_n_words=10,
#min_topic_size=12,
#n_gram_range=(1,3),
#calculate_probabilities=True,
verbose=True,
embedding_model='paraphrase-multilingual-MiniLM-L12-v2')
topics, probs = topic_model.fit_transform(list_docs)
gives
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Input In [26], in <cell line: 9>()
1 topic_model = BERTopic(language='french',#) #,
2 #top_n_words=10,
3 #min_topic_size=12,
(...)
6 verbose=True,
7 embedding_model='paraphrase-multilingual-MiniLM-L12-v2')
----> 9 topics, probs = topic_model.fit_transform(list_docs)
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/bertopic/_bertopic.py:284, in BERTopic.fit_transform(self, documents, embeddings, y)
282 # Extract embeddings
283 if embeddings is None:
--> 284 self.embedding_model = select_backend(self.embedding_model,
285 language=self.language)
286 embeddings = self._extract_embeddings(documents.Document,
287 method="document",
288 verbose=self.verbose)
289 logger.info("Transformed documents to Embeddings")
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/bertopic/backend/_utils.py:60, in select_backend(embedding_model, language)
58 # Create a Sentence Transformer model based on a string
59 if isinstance(embedding_model, str):
---> 60 return SentenceTransformerBackend(embedding_model)
62 # Select embedding model based on language
63 if language:
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/bertopic/backend/_sentencetransformers.py:43, in SentenceTransformerBackend.__init__(self, embedding_model)
41 self.embedding_model = embedding_model
42 elif isinstance(embedding_model, str):
---> 43 self.embedding_model = SentenceTransformer(embedding_model)
44 else:
45 raise ValueError("Please select a correct SentenceTransformers model: \n"
46 "`from sentence_transformers import SentenceTransformer` \n"
47 "`model = SentenceTransformer('all-MiniLM-L6-v2')`")
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/sentence_transformers/SentenceTransformer.py:115, in SentenceTransformer.__init__(self, model_name_or_path, modules, device)
113 for module_config in contained_modules:
114 module_class = import_from_string(module_config['type'])
--> 115 module = module_class.load(os.path.join(model_path, module_config['path']))
116 modules[module_config['name']] = module
119 if modules is not None and not isinstance(modules, OrderedDict):
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py:115, in Transformer.load(input_path)
113 with open(sbert_config_path) as fIn:
114 config = json.load(fIn)
--> 115 return Transformer(model_name_or_path=input_path, **config)
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/sentence_transformers/models/Transformer.py:31, in Transformer.__init__(self, model_name_or_path, max_seq_length, model_args, cache_dir, tokenizer_args, do_lower_case, tokenizer_name_or_path)
29 config = AutoConfig.from_pretrained(model_name_or_path, **model_args, cache_dir=cache_dir)
30 self.auto_model = AutoModel.from_pretrained(model_name_or_path, config=config, cache_dir=cache_dir)
---> 31 self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path if tokenizer_name_or_path is not None else model_name_or_path, cache_dir=cache_dir, **tokenizer_args)
33 if tokenizer_name_or_path is not None:
34 self.auto_model.config.tokenizer_class = self.tokenizer.__class__.__name__
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/transformers/tokenization_auto.py:341, in AutoTokenizer.from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
339 return tokenizer_class_fast.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
340 else:
--> 341 return tokenizer_class_py.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
343 raise ValueError(
344 "Unrecognized configuration class {} to build an AutoTokenizer.\n"
345 "Model type should be one of {}.".format(
346 config.__class__, ", ".join(c.__name__ for c in TOKENIZER_MAPPING.keys())
347 )
348 )
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1652, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1649 else:
1650 logger.info("loading file {} from cache at {}".format(file_path, resolved_vocab_files[file_id]))
-> 1652 return cls._from_pretrained(
1653 resolved_vocab_files, pretrained_model_name_or_path, init_configuration, *init_inputs, **kwargs
1654 )
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/transformers/tokenization_utils_base.py:1725, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, *init_inputs, **kwargs)
1723 # Instantiate tokenizer.
1724 try:
-> 1725 tokenizer = cls(*init_inputs, **init_kwargs)
1726 except OSError:
1727 raise OSError(
1728 "Unable to load vocabulary from file. "
1729 "Please check that the provided vocabulary is accessible and not corrupted."
1730 )
File ~/anaconda3/envs/bertopic/lib/python3.8/site-packages/transformers/tokenization_bert.py:193, in BertTokenizer.__init__(self, vocab_file, do_lower_case, do_basic_tokenize, never_split, unk_token, sep_token, pad_token, cls_token, mask_token, tokenize_chinese_chars, strip_accents, **kwargs)
164 def __init__(
165 self,
166 vocab_file,
(...)
177 **kwargs
178 ):
179 super().__init__(
180 do_lower_case=do_lower_case,
181 do_basic_tokenize=do_basic_tokenize,
(...)
190 **kwargs,
191 )
--> 193 if not os.path.isfile(vocab_file):
194 raise ValueError(
195 "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
196 "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file)
197 )
198 self.vocab = load_vocab(vocab_file)
File ~/anaconda3/envs/bertopic/lib/python3.8/genericpath.py:30, in isfile(path)
28 """Test whether a path is a regular file"""
29 try:
---> 30 st = os.stat(path)
31 except (OSError, ValueError):
32 return False
TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType
The same error occurs when I specify only embedding_model='paraphrase-multilingual-MiniLM-L12-v2'
.
Everything works fine when the language selected if english
or no arguments are given.
I am using bertopic through jupyter notebook and my environment is the following:
Gentoo Base System release 2.8
DISTRIB_ID="Gentoo"
NAME=Gentoo
ID=gentoo
PRETTY_NAME="Gentoo Linux"
ANSI_COLOR="1;32"
HOME_URL="https://www.gentoo.org/"
SUPPORT_URL="https://www.gentoo.org/support/"
BUG_REPORT_URL="https://bugs.gentoo.org/"
VERSION_ID="2.8"
# packages in environment at /u/salaunol/anaconda3/envs/bertopic:
#
# Name Version Build Channel
_libgcc_mutex 0.1 main
_openmp_mutex 4.5 1_gnu
absl-py 1.0.0 pypi_0 pypi
argon2-cffi 21.3.0 pyhd3eb1b0_0
argon2-cffi-bindings 21.2.0 py38h7f8727e_0
asttokens 2.0.5 pyhd3eb1b0_0
astunparse 1.6.3 pypi_0 pypi
attrs 21.4.0 pyhd3eb1b0_0
backcall 0.2.0 pyhd3eb1b0_0
beautifulsoup4 4.10.0 pypi_0 pypi
bertopic 0.9.4 pypi_0 pypi
blas 1.0 mkl
bleach 4.1.0 pyhd3eb1b0_0
blis 0.7.6 pypi_0 pypi
bpemb 0.3.3 pypi_0 pypi
ca-certificates 2022.2.1 h06a4308_0
cachetools 5.0.0 pypi_0 pypi
catalogue 2.0.6 pypi_0 pypi
certifi 2021.10.8 py38h06a4308_2
cffi 1.15.0 py38hd667e15_1
charset-normalizer 2.0.12 pypi_0 pypi
click 8.0.4 pypi_0 pypi
cloudpickle 2.0.0 pypi_0 pypi
cudatoolkit 11.0.221 h6bb024c_0
cycler 0.11.0 pypi_0 pypi
cymem 2.0.6 pypi_0 pypi
cython 0.29.28 pypi_0 pypi
dataclasses 0.6 pypi_0 pypi
dbus 1.13.18 hb2f20db_0
debugpy 1.5.1 py38h295c915_0
decorator 5.1.1 pyhd3eb1b0_0
defusedxml 0.7.1 pyhd3eb1b0_0
deprecated 1.2.13 pypi_0 pypi
entrypoints 0.3 py38_0
executing 0.8.3 pyhd3eb1b0_0
expat 2.4.4 h295c915_0
filelock 3.6.0 pypi_0 pypi
flair 0.7 pypi_0 pypi
flatbuffers 2.0 pypi_0 pypi
fontconfig 2.13.1 h6c09931_0
fonttools 4.31.1 pypi_0 pypi
freetype 2.11.0 h70c0345_0
ftfy 6.1.1 pypi_0 pypi
future 0.18.2 pypi_0 pypi
gast 0.5.3 pypi_0 pypi
gdown 4.4.0 pypi_0 pypi
gensim 3.8.3 pypi_0 pypi
giflib 5.2.1 h7b6447c_0
glib 2.69.1 h4ff587b_1
google-auth 2.6.2 pypi_0 pypi
google-auth-oauthlib 0.4.6 pypi_0 pypi
google-pasta 0.2.0 pypi_0 pypi
grpcio 1.44.0 pypi_0 pypi
gst-plugins-base 1.14.0 h8213a91_2
gstreamer 1.14.0 h28cd5cc_2
hdbscan 0.8.28 pypi_0 pypi
huggingface-hub 0.4.0 pypi_0 pypi
hyperopt 0.2.7 pypi_0 pypi
icu 58.2 he6710b0_3
idna 3.3 pypi_0 pypi
importlib-metadata 4.11.3 pypi_0 pypi
importlib_metadata 4.8.2 hd3eb1b0_0
intel-openmp 2021.4.0 h06a4308_3561
ipykernel 6.9.1 py38h06a4308_0
ipython 8.1.1 py38h06a4308_0
ipython_genutils 0.2.0 pyhd3eb1b0_1
ipywidgets 7.6.5 pyhd3eb1b0_1
janome 0.4.2 pypi_0 pypi
jedi 0.18.1 py38h06a4308_1
jinja2 3.0.3 pyhd3eb1b0_0
joblib 1.1.0 pypi_0 pypi
jpeg 9d h7f8727e_0
jsonschema 3.2.0 pyhd3eb1b0_2
jupyter 1.0.0 py38_7
jupyter_client 7.1.2 pyhd3eb1b0_0
jupyter_console 6.4.3 pyhd3eb1b0_0
jupyter_core 4.9.2 py38h06a4308_0
jupyterlab_pygments 0.1.2 py_0
jupyterlab_widgets 1.0.0 pyhd3eb1b0_1
keras 2.8.0 pypi_0 pypi
keras-preprocessing 1.1.2 pypi_0 pypi
kiwisolver 1.4.0 pypi_0 pypi
konoha 4.6.5 pypi_0 pypi
langcodes 3.3.0 pypi_0 pypi
langdetect 1.0.9 pypi_0 pypi
lcms2 2.12 h3be6417_0
ld_impl_linux-64 2.35.1 h7274673_9
libclang 13.0.0 pypi_0 pypi
libffi 3.3 he6710b0_2
libgcc-ng 9.3.0 h5101ec6_17
libgomp 9.3.0 h5101ec6_17
libpng 1.6.37 hbc83047_0
libsodium 1.0.18 h7b6447c_0
libstdcxx-ng 9.3.0 hd4cf53a_17
libtiff 4.2.0 h85742a9_0
libuuid 1.0.3 h7f8727e_2
libuv 1.40.0 h7b6447c_0
libwebp 1.2.2 h55f646e_0
libwebp-base 1.2.2 h7f8727e_0
libxcb 1.14 h7b6447c_0
libxml2 2.9.12 h03d6c58_0
llvmlite 0.38.0 pypi_0 pypi
lxml 4.8.0 pypi_0 pypi
lz4-c 1.9.3 h295c915_1
markdown 3.3.6 pypi_0 pypi
markupsafe 2.1.1 pypi_0 pypi
matplotlib 3.5.1 pypi_0 pypi
matplotlib-inline 0.1.2 pyhd3eb1b0_2
mistune 0.8.4 py38h7b6447c_1000
mkl 2021.4.0 h06a4308_640
mkl-service 2.4.0 py38h7f8727e_0
mkl_fft 1.3.1 py38hd3c417c_0
mkl_random 1.2.2 py38h51133e4_0
mpld3 0.3 pypi_0 pypi
murmurhash 1.0.6 pypi_0 pypi
nbclient 0.5.11 pyhd3eb1b0_0
nbconvert 6.3.0 py38h06a4308_0
nbformat 5.1.3 pyhd3eb1b0_0
ncurses 6.3 h7f8727e_2
nest-asyncio 1.5.1 pyhd3eb1b0_0
networkx 2.7.1 pypi_0 pypi
ninja 1.10.2 py38hd09550d_3
notebook 6.4.8 py38h06a4308_0
numba 0.55.1 pypi_0 pypi
numpy 1.21.5 pypi_0 pypi
numpy-base 1.21.2 py38h79a1101_0
oauthlib 3.2.0 pypi_0 pypi
openssl 1.1.1m h7f8727e_0
opt-einsum 3.3.0 pypi_0 pypi
overrides 3.1.0 pypi_0 pypi
packaging 21.3 pyhd3eb1b0_0
pandas 1.4.1 pypi_0 pypi
pandocfilters 1.5.0 pyhd3eb1b0_0
parso 0.8.3 pyhd3eb1b0_0
pathy 0.6.1 pypi_0 pypi
pcre 8.45 h295c915_0
pexpect 4.8.0 pyhd3eb1b0_3
pickleshare 0.7.5 pyhd3eb1b0_1003
pillow 9.0.1 pypi_0 pypi
pip 21.2.4 py38h06a4308_0
plotly 5.6.0 pypi_0 pypi
preshed 3.0.6 pypi_0 pypi
prometheus_client 0.13.1 pyhd3eb1b0_0
prompt-toolkit 3.0.20 pyhd3eb1b0_0
prompt_toolkit 3.0.20 hd3eb1b0_0
protobuf 3.19.4 pypi_0 pypi
ptyprocess 0.7.0 pyhd3eb1b0_2
pure_eval 0.2.2 pyhd3eb1b0_0
py4j 0.10.9.5 pypi_0 pypi
pyasn1 0.4.8 pypi_0 pypi
pyasn1-modules 0.2.8 pypi_0 pypi
pycparser 2.21 pyhd3eb1b0_0
pydantic 1.8.2 pypi_0 pypi
pygments 2.11.2 pyhd3eb1b0_0
pynndescent 0.5.6 pypi_0 pypi
pyparsing 3.0.7 pypi_0 pypi
pyqt 5.9.2 py38h05f1152_4
pyrsistent 0.18.0 py38heee7806_0
pysocks 1.7.1 pypi_0 pypi
python 3.8.12 h12debd9_0
python-dateutil 2.8.2 pyhd3eb1b0_0
pytorch 1.7.0 py3.8_cuda11.0.221_cudnn8.0.3_0 pytorch
pytz 2022.1 pypi_0 pypi
pyyaml 5.4.1 pypi_0 pypi
pyzmq 22.3.0 py38h295c915_2
qt 5.9.7 h5867ecd_1
qtconsole 5.2.2 pyhd3eb1b0_0
qtpy 1.11.2 pyhd3eb1b0_0
readline 8.1.2 h7f8727e_1
regex 2022.3.15 pypi_0 pypi
requests 2.27.1 pypi_0 pypi
requests-oauthlib 1.3.1 pypi_0 pypi
rsa 4.8 pypi_0 pypi
sacremoses 0.0.49 pypi_0 pypi
scikit-learn 1.0.2 pypi_0 pypi
scipy 1.8.0 pypi_0 pypi
segtok 1.5.11 pypi_0 pypi
send2trash 1.8.0 pyhd3eb1b0_1
sentence-transformers 1.2.1 pypi_0 pypi
sentencepiece 0.1.91 pypi_0 pypi
setuptools 58.0.4 py38h06a4308_0
sip 4.19.13 py38h295c915_0
six 1.16.0 pyhd3eb1b0_1
smart-open 5.2.1 pypi_0 pypi
soupsieve 2.3.1 pypi_0 pypi
spacy 3.2.3 pypi_0 pypi
spacy-legacy 3.0.9 pypi_0 pypi
spacy-loggers 1.0.1 pypi_0 pypi
sqlite 3.38.0 hc218d9a_0
sqlitedict 2.0.0 pypi_0 pypi
srsly 2.4.2 pypi_0 pypi
stack_data 0.2.0 pyhd3eb1b0_0
tabulate 0.8.9 pypi_0 pypi
tenacity 8.0.1 pypi_0 pypi
tensorboard 2.8.0 pypi_0 pypi
tensorboard-data-server 0.6.1 pypi_0 pypi
tensorboard-plugin-wit 1.8.1 pypi_0 pypi
tensorflow 2.8.0 pypi_0 pypi
tensorflow-hub 0.12.0 pypi_0 pypi
tensorflow-io-gcs-filesystem 0.24.0 pypi_0 pypi
tensorflow-text 2.8.1 pypi_0 pypi
termcolor 1.1.0 pypi_0 pypi
terminado 0.13.1 py38h06a4308_0
testpath 0.5.0 pyhd3eb1b0_0
tf-estimator-nightly 2.8.0.dev2021122109 pypi_0 pypi
thinc 8.0.15 pypi_0 pypi
threadpoolctl 3.1.0 pypi_0 pypi
tk 8.6.11 h1ccaba5_0
tokenizers 0.9.3 pypi_0 pypi
torch 1.7.0 pypi_0 pypi
torchaudio 0.7.0 py38 pytorch
torchvision 0.8.1 pypi_0 pypi
tornado 6.1 py38h27cfd23_0
tqdm 4.63.0 pypi_0 pypi
traitlets 5.1.1 pyhd3eb1b0_0
transformers 3.5.1 pypi_0 pypi
typer 0.4.0 pypi_0 pypi
typing-extensions 4.1.1 pypi_0 pypi
typing_extensions 3.10.0.2 pyh06a4308_0
umap-learn 0.5.2 pypi_0 pypi
urllib3 1.26.9 pypi_0 pypi
wasabi 0.9.0 pypi_0 pypi
wcwidth 0.2.5 pyhd3eb1b0_0
webencodings 0.5.1 py38_1
werkzeug 2.0.3 pypi_0 pypi
wheel 0.37.1 pyhd3eb1b0_0
widgetsnbextension 3.5.2 py38h06a4308_0
wrapt 1.14.0 pypi_0 pypi
xz 5.2.5 h7b6447c_0
zeromq 4.3.4 h2531618_0
zipp 3.7.0 pyhd3eb1b0_0
zlib 1.2.11 h7f8727e_4
zstd 1.4.9 haebb681_0
Did I miss something in the implementation? Did anyone have the same issue?
Issue Analytics
- State:
- Created a year ago
- Comments:6 (2 by maintainers)
Top Results From Across the Web
stat: path should be string, bytes, os.PathLike or integer, not ...
It looks like StanfordNERTagger wants file paths, which it will use to open the files, but you've given it open file handles. –...
Read more >stat: path should be string, bytes, os.PathLike or integer, not ...
Hi, I'm experiencing a strange issue. File "/usr/bin/interlace", line 11, in load_entry_point('Interlace==1.8.2', 'console_scripts', ...
Read more >Backup Error: TypeError: stat: path should be string, bytes, os ...
PathLike or integer, not NoneType. Is anyone getting this error on Dropbox or Amazon S3? This started two days ago and i can't...
Read more >scandir: path should be string, bytes, os.PathLike or None, not ...
When using a FilePathField as follows def chunk_path(): return os.path.join(settings.MEDIA_ROOT, 'footage/chunks') file = models.
Read more >Issue #7469: ipa-replica-prepare fail with "stat: path should be ...
Host.master.cmd32] stat: path should be string, bytes, os.PathLike or integer, not NoneType [ipatests.pytest_plugins.integration.host.
Read more >
Top Related Medium Post
No results found
Top Related StackOverflow Question
No results found
Troubleshoot Live Code
Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free
Top Related Reddit Thread
No results found
Top Related Hackernoon Post
No results found
Top Related Tweet
No results found
Top Related Dev.to Post
No results found
Top Related Hashnode Post
No results found
Your code is working for me in a clean environment. I would suggest starting from a fresh environment and installing BERTopic with
pip install --upgrade bertopic
as that installs most dependencies to the newest versions. For example, I can see thatsentence-tranformers
is1.2.1
in your environment which although should not be an issue, in theory, some specific versions might clash. So upgrading these dependencies might solve your issue.Good to know, thanks!