Schedule dont work on production dagster.check.CheckError but task manualy run ok
See original GitHub issueSummary
I have a dagster service in a docker compose that use a postgres sql, a dagit, and the dagster daemon with the deffault DagsterDaemonScheduler
.
When I run this localy works fine, the schedules run on time with out error.
But when I put it on production the schedules dont run and give me the following error:
dagster.check.CheckError: Invariant failed. Description: Attempted to deserialize class "ExternalScheduleExecutionData" which is not in the whitelist.
File "/usr/local/lib/python3.8/site-packages/dagster/scheduler/scheduler.py", line 204, in launch_scheduled_runs_for_schedule
yield from _schedule_runs_at_time(
File "/usr/local/lib/python3.8/site-packages/dagster/scheduler/scheduler.py", line 254, in _schedule_runs_at_time
schedule_execution_data = repo_location.get_external_schedule_execution_data(
File "/usr/local/lib/python3.8/site-packages/dagster/core/host_representation/repository_location.py", line 682, in get_external_schedule_execution_data
return sync_get_external_schedule_execution_data_grpc(
File "/usr/local/lib/python3.8/site-packages/dagster/api/snapshot_schedule.py", line 45, in sync_get_external_schedule_execution_data_grpc
api_client.external_schedule_execution(
File "/usr/local/lib/python3.8/site-packages/dagster/grpc/client.py", line 277, in external_schedule_execution
return deserialize_json_to_dagster_namedtuple(
File "/usr/local/lib/python3.8/site-packages/dagster/serdes/serdes.py", line 241, in deserialize_json_to_dagster_namedtuple
dagster_namedtuple = _deserialize_json_to_dagster_namedtuple(
File "/usr/local/lib/python3.8/site-packages/dagster/serdes/serdes.py", line 252, in _deserialize_json_to_dagster_namedtuple
return _unpack_value(seven.json.loads(json_str), whitelist_map=whitelist_map)
File "/usr/local/lib/python3.8/site-packages/dagster/serdes/serdes.py", line 276, in _unpack_value
check.invariant(
File "/usr/local/lib/python3.8/site-packages/dagster/check/__init__.py", line 167, in invariant
raise CheckError(f"Invariant failed. Description: {desc}")
But, if I run the task manually on production they run ok. I see the documentation and seams to a type check error, and I remove all the typing on the tasks, but nothing happend.
Reproduction
I dont know how to reproduce this. Because in local envirorment works fine, the task run manualy on productions, and I cant find the error.
Dagit UI/UX Issue Screenshots
Error Runs on production
Additional Info about Your Environment
Example of repositories
repository.py
@repository
def cement_scraper_repository():
return [cement_scrapper_pipeline, daily_cement_scrapper_schedule]
@repository
def mysql_intercement_bi_repository():
return [
real_calls_pipeline, daily_real_calls_schedule,
# forecast_pipeline, daily_forecast_schedule # We dont use forecast at the moment
]
@repository
def call_center_repository():
return [call_center_pipeline, daily_call_center_schedule]
@repository
def br_concrete_repository():
return [br_concrete_pipeline, br_concrete_schedule]
Example of schedules
call_center.py
from datetime import time, datetime
from dagster import daily_schedule
@daily_schedule(
pipeline_name="call_center_pipeline",
start_date=datetime(2021, 1, 1),
execution_time=time(hour=5, minute=15),
execution_timezone='America/Argentina/Buenos_Aires',
)
def daily_call_center_schedule(date):
return dict(solids=dict(
get_service=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_yesterday_general_values=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_calls_for_options=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_calls_status_for_month_by_year=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_abandon_calls_count_by_day_and_queue=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_top_15_client_calls_by_sales_representatives=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_top_15_customers=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_top_15_sales_representatives_calls_to_the_call_center=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_top_no_customers=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
save_customers_vs_no_customers=dict(config=dict(date=dict(value=date.strftime("%Y-%m-%d")))),
),
)
Example of tasks
from datetime import date, datetime
from typing import Tuple
from dateutil.relativedelta import relativedelta
from dagster import solid, Failure, pipeline
from scrapper.call_center.factory import build_general_call_center_service
from scrapper.services.gitlab_logger import GitLabLogger
from scrapper.services.utils import get_yesterday_and_first_day_of_that_month
logger = GitLabLogger()
@solid
def get_service(_):
return build_general_call_center_service()
@solid
def save_yesterday_general_values(context, service):
yesterday = (date.today() - relativedelta(days=1)).strftime('%Y-%m-%d')
was_saved, was_saved_str = service.save_general_values_on_db_by_date(initial_date=yesterday, end_date=yesterday)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_yesterday_general_values:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_calls_for_options(context, service, general_values_saved):
yesterday_year = (date.today() - relativedelta(days=1)).strftime('%Y')
was_saved, was_saved_str = service.save_calls_for_options_by_year(year=yesterday_year)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_calls_for_options:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_calls_status_for_month_by_year(context, service, general_values_saved):
yesterday_year = (date.today() - relativedelta(days=1)).strftime('%Y')
was_saved, was_saved_str = service.save_calls_status_for_month_by_year(year=yesterday_year)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_calls_status_for_month_by_year:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_abandon_calls_count_by_day_and_queue(context, service, general_values_saved):
yesterday = (date.today() - relativedelta(days=1)).strftime('%Y-%m-%d')
was_saved, was_saved_str = service.save_abandon_calls_count_by_day_and_queue(initial_date=yesterday, end_date=yesterday)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_abandon_calls_count_by_day_and_queue:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_top_15_client_calls_by_sales_representatives(context, service, general_values_saved):
yesterday, first_day_of_the_month = get_yesterday_and_first_day_of_that_month()
was_saved, was_saved_str = service.save_top_15_client_calls_by_sales_representatives(
initial_date=first_day_of_the_month, end_date=yesterday
)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_top_15_client_calls_by_sales_representatives:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_top_15_customers(context, service, general_values_saved):
yesterday, first_day_of_the_month = get_yesterday_and_first_day_of_that_month()
was_saved, was_saved_str = service.save_top_15_customers(initial_date=first_day_of_the_month, end_date=yesterday)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_top_15_customers:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_top_15_sales_representatives_calls_to_the_call_center(context, service, general_values_saved):
yesterday, first_day_of_the_month = get_yesterday_and_first_day_of_that_month()
was_saved, was_saved_str = service.save_top_15_sales_representatives_calls_to_the_call_center(
initial_date=first_day_of_the_month, end_date=yesterday
)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_top_15_sales_representatives_calls_to_the_call_center:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_top_no_customers(context, service, general_values_saved):
yesterday, first_day_of_the_month = get_yesterday_and_first_day_of_that_month()
was_saved, was_saved_str = service.save_top_no_customers(initial_date=first_day_of_the_month, end_date=yesterday)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_top_no_customers:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@solid
def save_customers_vs_no_customers(context, service, general_values_saved):
yesterday, first_day_of_the_month = get_yesterday_and_first_day_of_that_month()
was_saved, was_saved_str = service.save_customers_vs_no_customers(initial_date=first_day_of_the_month, end_date=yesterday)
if not was_saved:
context.log.info(was_saved_str)
logger.send(
title='call_center',
description=f'Could not save_customers_vs_no_customers:\n{was_saved_str}',
start_time=datetime.now(),
severity=1,
)
raise Failure(was_saved_str)
context.log.info(was_saved_str)
return was_saved
@pipeline
def call_center_pipeline():
service = get_service()
general_values_saved = save_yesterday_general_values(service=service)
if not general_values_saved:
raise Failure('No general values saved, we cant proceed')
save_calls_for_options(service=service, general_values_saved=general_values_saved)
save_calls_status_for_month_by_year(service=service, general_values_saved=general_values_saved)
save_abandon_calls_count_by_day_and_queue(service=service, general_values_saved=general_values_saved)
save_top_15_client_calls_by_sales_representatives(service=service, general_values_saved=general_values_saved)
save_top_15_customers(service=service, general_values_saved=general_values_saved)
save_top_15_sales_representatives_calls_to_the_call_center(service=service, general_values_saved=general_values_saved)
save_top_no_customers(service=service, general_values_saved=general_values_saved)
save_customers_vs_no_customers(service=service, general_values_saved=general_values_saved)
Message from the maintainers:
Impacted by this bug? Give it a 👍. We factor engagement into prioritization.
Issue Analytics
- State:
- Created 2 years ago
- Reactions:1
- Comments:9 (4 by maintainers)
Top GitHub Comments
Could you update the versions of the libraries on scrappe_web_tasks to 0.11.12 like the rest of them to see if that fixes the problem?
You probably need to rebuild the scrappers_web_tasks service (and make sure that its Dockerfile is using the same requirements that you just listed there). I’d need to see your exact Dockerfile to give specific advice on how to do that, but the ultimate goal is to make sure that it’s installing dagster 0.11.10 and not some older version.