question-mark
Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

accuracy drops after saving and loading the model. NER task

See original GitHub issue

python: 3.7.3 spacy: 2.3.4

After each training epoch, I run the model on independent data to check how well the model is performing. After that, I save the precision of all iterations to a file.

But after saving the model and loading it, I run it on the same data and get a much worse result. What could have gone wrong? If you need more code, I can show you what is needed

def evaluate(ner_model, examples):
    scorer = Scorer()
    for input_, annot in examples:
        doc_gold_text = ner_model.make_doc(input_)
        gold = GoldParse(doc_gold_text, entities=annot)
        pred_value = ner_model(input_)
        scorer.score(pred_value, gold)
    return scorer.scores
def main(input_file=None, n_iter=300):
    
    scores = []
    
    with jsonlines.open('scorer_test.jsonl') as f:
        scorer_test = [line for line in f]
        
    scorer_test = [[x['text'], x['labels']] for x in scorer_test if is_need_doctype(x['text'], x['labels'])]
    
    if input_file and click.confirm('Do you want to update Training Data?', default=True):
        logger.info('Updating Training Data...')
        SPACY.NER_TRAINING_FILE.write_text(input_file.read_text())

    logger.info('Reading Training Data')
    train_data = Train.load_doccano_data(SPACY.NER_TRAINING_FILE)
    
    spacy.prefer_gpu()
    
    nlp = Model.load()
    
    nlp.tokenizer = create_custom_tokenizer(nlp)

    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner)
    else:
        ner = nlp.get_pipe("ner")

    for _, annotations in train_data:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    optimizer = nlp.resume_training()
    move_names = list(ner.move_names)

    # get names of other pipes to disable them during training
    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    # only train NER
    with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
        # show warnings for misaligned entity spans once
        warnings.filterwarnings("once", category=UserWarning, module='spacy')
        sizes = compounding(1.0, 16.0, 1.001)
        # batch up the examples using spaCy's minibatch
        for _ in tqdm(range(n_iter)):
            random.shuffle(train_data)
            batches = minibatch(train_data, size=sizes)
            losses = {}
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
            logger.info(f"Losses: {losses}")
            
            scores.append(evaluate(nlp, scorer_test))
            
    with jsonlines.open('scores-' + str(int(time.time())) + '.jsonl', mode='w') as writer:
        writer.write_all(scores)

    if click.confirm('Do you want to update Model?', default=True):
        Model.save(nlp, optimizer)

custom tokenizer:

def create_custom_tokenizer(nlp):
    prefix_re = spacy.util.compile_prefix_regex(tuple([r'\d{2}\.\d{2}\.\d{4}'] + list(nlp.Defaults.prefixes)))
    infix_re = spacy.util.compile_infix_regex(tuple([r'(\.)', r'(:)', r'(\()', r'(\))'] + list(nlp.Defaults.infixes)))
    suffixes = list(nlp.Defaults.suffixes)
    suffixes.remove('\.\.+')
    suffixes.append('\.\.\.+')
    suffix_re = spacy.util.compile_suffix_regex(tuple([r'-'] + suffixes))
    return Tokenizer(nlp.vocab, nlp.Defaults.tokenizer_exceptions,
                     prefix_search = prefix_re.search, 
                     infix_finditer = infix_re.finditer,
                     suffix_search = suffix_re.search,
                     token_match=None)

custom_seg:

boundary = re.compile('^[0-9]$')
def custom_seg(doc):
    prev = doc[0].text
    length = len(doc)
    for index, token in enumerate(doc):
        is_number = token.text == '.' and boundary.match(prev) and index != (length - 1)
        if is_number or token.text in [':', ';', ',', '/', '*'] or not token.is_punct:
            next_t = index + 1
            while next_t < length:
                doc[next_t].sent_start = False
                if doc[next_t].is_space:
                    next_t += 1
                else:
                    break
        prev = token.text
    return doc

model load/save:

class Model:
    @classmethod
    def load(cls):
        logger.info('Loading model...')
        nlp = spacy.load('de_core_news_lg')
        if CUSTOM_SEG in nlp.pipe_names:
            nlp.remove_pipe(CUSTOM_SEG)
        nlp.add_pipe(custom_seg, name=CUSTOM_SEG, before='parser')
        logger.info(f'Successfully loaded {cls.get_meta(nlp)}')
        return nlp

    @classmethod
    def save(cls, nlp, optimizer=None):
        logger.info('Saving model...')
        nlp.meta['name'] = 'Registration Docs Parser'
        nlp.meta['version'] = datetime.now().strftime('%y.%m.%d %H:%M:%S')
        nlp.remove_pipe(CUSTOM_SEG)
        with nlp.use_params(optimizer.averages):
            nlp.to_disk(SPACY.MODEL_PATH)
        logger.info(f'Successfully saved {cls.get_meta(nlp)}')

    @staticmethod
    def get_meta(nlp):
        return f'{nlp.meta["name"]} ({nlp.meta["version"]})'

but after saving, I load model like this:

def get_model():
    nlp = spacy.load('data/model')
    if 'custom_seg' in nlp.pipe_names:
        nlp.remove_pipe('custom_seg')
    nlp.add_pipe(custom_seg, name='custom_seg', before='parser')
    return nlp

Then I call the evaluate function on the same data in the scorer_test.jsonl file and get results that are much lower than what I got at each training iteration. For some labels, the results are even lower than after the first training epoch.

Issue Analytics

  • State:closed
  • Created 3 years ago
  • Reactions:5
  • Comments:11 (6 by maintainers)

github_iconTop GitHub Comments

2reactions
svlandegcommented, Dec 22, 2020

Right, so that means the models aren’t entirely the same - so something must be going wrong with the IO.

If you can provide a minimal working script that we can run that shows this error, we would be able to look in more detail to see there might be a bug here.

We’ll really need this minimal script to be able to investigate further - just one script that runs from start to finish and exhibits the error. Otherwise it’s too difficult for us to help debug.

1reaction
svlandegcommented, Jan 21, 2021

Hi @Zimovik007, it’s not always easy/convenient for us to follow up on multiple issues within the same thread. I’ll go ahead and close this one as the original issue is resolved. If you’re still running into speed issues, feel free to open a new issue describing the context in more detail - including a minimal reproducible script, the versions of spaCy and the models you’re using, and the results you’re getting. Thanks!

Read more comments on GitHub >

github_iconTop Results From Across the Web

How to Save and Load Your Keras Deep Learning Model
In this post, you will discover how to save your Keras models to files and load them up again to make predictions.
Read more >
Model accuracy lower after loading - fast.ai Course Forums
I am facing the same issue. I saved the model after training it gave 79% acc and now when I load it in...
Read more >
Model performance decrease to nearly 1/4 when loading a ...
I'm currently running a named entity recognition (NER) task with a custom dataset. All is well and validation set/test set accuracy seems to ......
Read more >
Training Pipelines & Models · spaCy Usage Documentation
When you load a config, spaCy checks if the settings are complete and if all ... ner,textcat --optimize accuracy | python -m spacy...
Read more >
Training Custom NER models in SpaCy to auto-detect named ...
Named Entity Recognition is a standard NLP task that can identify entities ... Load a spacy model and chekc if it has ner...
Read more >

github_iconTop Related Medium Post

No results found

github_iconTop Related StackOverflow Question

No results found

github_iconTroubleshoot Live Code

Lightrun enables developers to add logs, metrics and snapshots to live code - no restarts or redeploys required.
Start Free

github_iconTop Related Reddit Thread

No results found

github_iconTop Related Hackernoon Post

No results found

github_iconTop Related Tweet

No results found

github_iconTop Related Dev.to Post

No results found

github_iconTop Related Hashnode Post

No results found