These are outputs from the ths_ds_sanity script
ARROW_DIRECT May 10¶
first test pass
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
querying arrow/parquet dataset DIRECT_CLASSIC
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDE3, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDE4, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDI0, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDI2, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDI3, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDI5, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDM0, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDM1, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDM2, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDM5, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDMy, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDMz, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDQ0, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDQ2, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDQ4, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDQw, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDQy, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDU2, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDUw, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDUy, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDY4, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDY5, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDYw, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDc0, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDc3, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDc4, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDcw, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDg1, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDg2, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDgx, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDgy, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDgz, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDk0, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDk1, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDk2, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDk3, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDk5, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDkw, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDky, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NDkz, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTA1, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTA3, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTA4, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTA5, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTAx, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTAz, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTEw, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkVGFzazoxMzI4NTEy, 2262897, 3991, 27, 21, 1, 1, True
Grand total: 96981300
NLOC IMT defragged¶
- repartition from CDC4_compact: real 1:50s
- sanity ALL: real 5m6.600s
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ time poetry run python scripts/migration/ths_r4_sanity.py count-rlz -S ARROW -D CDC4_NLOC_IMT -R ALL
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
querying arrow/parquet dataset CDC4_NLOC_IMT
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2262897, 3991, 27, 21, 1, 1, True
Grand total: 98274384
real 5m6.600s
user 17m58.700s
sys 2m18.136s
Parquet Defragged¶
- defrag from CDC4: real 14m22.811s
- sanity ALL: real 1m52.309s
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ time poetry run python scripts/migration/ths_r4_sanity.py count-rlz -S ARROW -D CDC4_compacted -R ALL
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
querying arrow/parquet dataset CDC4_compacted
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2262897, 3991, 27, 21, 1, 1, True
Grand total: 98274384
real 1m52.309s
user 6m24.979s
sys 1m33.974s
ARROW/arrow-partitioned¶
Build: real 695m51.701s
- a single pandas dataframe per subtask, instead of batching (See previous)...
- not really any faster to produce
- 57 GB on disk,
- more even file distribution.
- 8 times slower to execute sanity checks (compared to CDC4 in parquet)
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ time poetry run sanity count-rlz -S ARROW -D arrow-partitioned -R ALL
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
querying arrow/parquet dataset arrow-partitioned
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2262897, 3991, 27, 21, 1, 1, True
Grand total: 98274384
real 16m46.848s
dataset: pq-CDC4¶
Build: real 712m15.169s
notes:
- this used a a single pandas dataframe per subtask, instead of batching and this proved a little slower than 250K batched...
- no change in overall size (25GB)
- smaller number of larger files = GOOD, but still a lot of very small files ??
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ ls -lath WORKING/ARROW/pq-CDC/*/*.parquet |wc
5262 47358 689322
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ ls -lath WORKING/ARROW/pq-CDC2/*/*.parquet |wc
4781 43029 631092
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ ls -lath WORKING/ARROW/pq-CDC4/*/*.parquet |wc
3136 28224 410816
Sanity¶
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ time poetry run sanity count-rlz -S ARROW -D pq-CDC4 -R ALL
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
querying arrow/parquet dataset pq-CDC4
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1293084, 3991, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2262897, 3991, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2262897, 3991, 27, 21, 1, 1, True
Grand total: 98274384
real 2m0.318s
user 6m35.745s
sys 1m33.922s
CDC3¶
sanity count-rlz -S LOCAL -R ALL -D pq-CDC3¶
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ time poetry run sanity count-rlz -S LOCAL -R ALL -D pq-CDC3
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1293084, 3991, 27, 12, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2262897, 3991, 27, 21, - , - , -
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2262897, 3991, 27, 21, - , - , -
Grand total: 98274384
real 70m59.603s
user 67m39.088s
sys 2m22.657s
New dataset pq-CDC2 statistics¶
Grand total: 94925520, all calcs are consistent¶
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ poetry run sanity count-rlz -S ARROW -D pq-CDC2 -R all
INFO:pynamodb.settings:Override settings for pynamo available /etc/pynamodb/global_default_settings.py
NZ 0.1grid has 3741 locations
All (0.1 grid + SRWG + NZ) has 3991 locations
querying arrow/parquet dataset pq-CDC2
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30
================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1249020, 3855, 27, 12, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2185785, 3855, 27, 21, 1, 1
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1249020, 3855, 27, 12, 1, 1
Grand total: 94925520
Ddataset pq-CDC statistics¶
Grand total: 94925520, one calc inconsistent¶
chrisbc@tryharder-ubuntu:/GNSDATA/LIB/toshi-hazard-store$ poetry run sanity count-rlz -S ARROW -D pq-CDC -R all
...
NZ 0.1grid has 3741 locations
All (0.1 grid + SRWG + NZ) has 3991 locations
querying arrow/parquet dataset pq-CDC
calculation_id, uniq_rlzs, uniq_locs, uniq_imts, uniq_gmms, uniq_srcs, uniq_vs30, consistent
============================================================================================
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mg==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Mw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNQ==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMg==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOQ==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNA==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNw==, 2498040, 3855, 27, 12, 1, 1, False
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNg==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Nw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMQ==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMg==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxMw==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNg==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1OQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1NA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzOQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mg==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1Ng==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNg==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MA==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzMA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyNw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxOA==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU1MQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Mw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMA==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Ng==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU2MQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0MQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyMw==, 1249020, 3855, 27, 12, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0NQ==, 2177430, 3841, 27, 21, 1, 1, False
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOA==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0Nw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODU0OQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUzNw==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUyOQ==, 2185785, 3855, 27, 21, 1, 1, True
T3BlbnF1YWtlSGF6YXJkU29sdXRpb246MTMyODUxNA==, 1249020, 3855, 27, 12, 1, 1, True
Grand total: 96166185