Stuck on an issue?

Lightrun Answers was designed to reduce the constant googling that comes with debugging 3rd party libraries. It collects links to all the places you might be looking at while hunting down a tough bug.

And, if you’re still stuck at the end, we’re happy to hop on a call to see how we can help out.

accuracy drops after saving and loading the model. NER task

See original GitHub issue

python: 3.7.3 spacy: 2.3.4

After each training epoch, I run the model on independent data to check how well the model is performing. After that, I save the precision of all iterations to a file.

But after saving the model and loading it, I run it on the same data and get a much worse result. What could have gone wrong? If you need more code, I can show you what is needed

def evaluate(ner_model, examples):
    scorer = Scorer()
    for input_, annot in examples:
        doc_gold_text = ner_model.make_doc(input_)
        gold = GoldParse(doc_gold_text, entities=annot)
        pred_value = ner_model(input_)
        scorer.score(pred_value, gold)
    return scorer.scores

def main(input_file=None, n_iter=300):
    
    scores = []
    
    with jsonlines.open('scorer_test.jsonl') as f:
        scorer_test = [line for line in f]
        
    scorer_test = [[x['text'], x['labels']] for x in scorer_test if is_need_doctype(x['text'], x['labels'])]
    
    if input_file and click.confirm('Do you want to update Training Data?', default=True):
        logger.info('Updating Training Data...')
        SPACY.NER_TRAINING_FILE.write_text(input_file.read_text())

    logger.info('Reading Training Data')
    train_data = Train.load_doccano_data(SPACY.NER_TRAINING_FILE)
    
    spacy.prefer_gpu()
    
    nlp = Model.load()
    
    nlp.tokenizer = create_custom_tokenizer(nlp)

    if "ner" not in nlp.pipe_names:
        ner = nlp.create_pipe("ner")
        nlp.add_pipe(ner)
    else:
        ner = nlp.get_pipe("ner")

    for _, annotations in train_data:
        for ent in annotations.get("entities"):
            ner.add_label(ent[2])

    optimizer = nlp.resume_training()
    move_names = list(ner.move_names)

    # get names of other pipes to disable them during training
    pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
    # only train NER
    with nlp.disable_pipes(*other_pipes), warnings.catch_warnings():
        # show warnings for misaligned entity spans once
        warnings.filterwarnings("once", category=UserWarning, module='spacy')
        sizes = compounding(1.0, 16.0, 1.001)
        # batch up the examples using spaCy's minibatch
        for _ in tqdm(range(n_iter)):
            random.shuffle(train_data)
            batches = minibatch(train_data, size=sizes)
            losses = {}
            for batch in batches:
                texts, annotations = zip(*batch)
                nlp.update(texts, annotations, sgd=optimizer, drop=0.2, losses=losses)
            logger.info(f"Losses: {losses}")
            
            scores.append(evaluate(nlp, scorer_test))
            
    with jsonlines.open('scores-' + str(int(time.time())) + '.jsonl', mode='w') as writer:
        writer.write_all(scores)

    if click.confirm('Do you want to update Model?', default=True):
        Model.save(nlp, optimizer)

custom tokenizer:

def create_custom_tokenizer(nlp):
    prefix_re = spacy.util.compile_prefix_regex(tuple([r'\d{2}\.\d{2}\.\d{4}'] + list(nlp.Defaults.prefixes)))
    infix_re = spacy.util.compile_infix_regex(tuple([r'(\.)', r'(:)', r'(\()', r'(\))'] + list(nlp.Defaults.infixes)))
    suffixes = list(nlp.Defaults.suffixes)
    suffixes.remove('\.\.+')
    suffixes.append('\.\.\.+')
    suffix_re = spacy.util.compile_suffix_regex(tuple([r'-'] + suffixes))
    return Tokenizer(nlp.vocab, nlp.Defaults.tokenizer_exceptions,
                     prefix_search = prefix_re.search, 
                     infix_finditer = infix_re.finditer,
                     suffix_search = suffix_re.search,
                     token_match=None)

custom_seg:

boundary = re.compile('^[0-9]$')
def custom_seg(doc):
    prev = doc[0].text
    length = len(doc)
    for index, token in enumerate(doc):
        is_number = token.text == '.' and boundary.match(prev) and index != (length - 1)
        if is_number or token.text in [':', ';', ',', '/', '*'] or not token.is_punct:
            next_t = index + 1
            while next_t < length:
                doc[next_t].sent_start = False
                if doc[next_t].is_space:
                    next_t += 1
                else:
                    break
        prev = token.text
    return doc

model load/save:

class Model:
    @classmethod
    def load(cls):
        logger.info('Loading model...')
        nlp = spacy.load('de_core_news_lg')
        if CUSTOM_SEG in nlp.pipe_names:
            nlp.remove_pipe(CUSTOM_SEG)
        nlp.add_pipe(custom_seg, name=CUSTOM_SEG, before='parser')
        logger.info(f'Successfully loaded {cls.get_meta(nlp)}')
        return nlp

    @classmethod
    def save(cls, nlp, optimizer=None):
        logger.info('Saving model...')
        nlp.meta['name'] = 'Registration Docs Parser'
        nlp.meta['version'] = datetime.now().strftime('%y.%m.%d %H:%M:%S')
        nlp.remove_pipe(CUSTOM_SEG)
        with nlp.use_params(optimizer.averages):
            nlp.to_disk(SPACY.MODEL_PATH)
        logger.info(f'Successfully saved {cls.get_meta(nlp)}')

    @staticmethod
    def get_meta(nlp):
        return f'{nlp.meta["name"]} ({nlp.meta["version"]})'

but after saving, I load model like this:

def get_model():
    nlp = spacy.load('data/model')
    if 'custom_seg' in nlp.pipe_names:
        nlp.remove_pipe('custom_seg')
    nlp.add_pipe(custom_seg, name='custom_seg', before='parser')
    return nlp

Then I call the evaluate function on the same data in the scorer_test.jsonl file and get results that are much lower than what I got at each training iteration. For some labels, the results are even lower than after the first training epoch.

Issue Analytics

State:
Created 3 years ago
Reactions:5
Comments:11 (6 by maintainers)

Top GitHub Comments

2reactions

svlandegcommented, Dec 22, 2020

Right, so that means the models aren’t entirely the same - so something must be going wrong with the IO.

If you can provide a minimal working script that we can run that shows this error, we would be able to look in more detail to see there might be a bug here.

We’ll really need this minimal script to be able to investigate further - just one script that runs from start to finish and exhibits the error. Otherwise it’s too difficult for us to help debug.

1reaction

svlandegcommented, Jan 21, 2021

Hi @Zimovik007, it’s not always easy/convenient for us to follow up on multiple issues within the same thread. I’ll go ahead and close this one as the original issue is resolved. If you’re still running into speed issues, feel free to open a new issue describing the context in more detail - including a minimal reproducible script, the versions of spaCy and the models you’re using, and the results you’re getting. Thanks!