switched backend back to numpy so it maybe fixes the memory leak
This commit is contained in:
parent
a8854fe4a4
commit
0f3d33ac13
@ -6,6 +6,7 @@ import json
|
|||||||
import glob
|
import glob
|
||||||
import uuid
|
import uuid
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import tracemalloc
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import django
|
import django
|
||||||
@ -17,10 +18,11 @@ from GaiaDBInterface.models import GaiaSource, CatalogFile
|
|||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
help = 'Ingest CSV files into the database'
|
help = 'Ingest CSV files into the database'
|
||||||
|
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
|
|
||||||
if os.path.exists('config.json'):
|
if os.path.exists('config.json'):
|
||||||
with open('config.json', 'r') as config_file:
|
with open('config.json', 'r') as config_file:
|
||||||
config = json.load(config_file)
|
config = json.load(config_file)
|
||||||
@ -124,7 +126,7 @@ class Command(BaseCommand):
|
|||||||
file_path,
|
file_path,
|
||||||
#comment='#',
|
#comment='#',
|
||||||
header=1000,
|
header=1000,
|
||||||
engine="pyarrow"
|
#engine="pyarrow"
|
||||||
)
|
)
|
||||||
|
|
||||||
gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model
|
gaiasource_fields = [field.name for field in GaiaSource._meta.get_fields()] #get fields from the model
|
||||||
@ -135,19 +137,19 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict
|
data_dict = df_filtered.to_dict(orient='records') #translate the df into a dict
|
||||||
|
|
||||||
df = None #free up memory
|
#df = None #free up memory
|
||||||
|
|
||||||
gaia_source_instances = [
|
gaia_source_instances = [
|
||||||
GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey
|
GaiaSource(**data, catalog_file=catalog_file) for data in data_dict #create gaiasource instances, set the foreignkey
|
||||||
]
|
]
|
||||||
|
|
||||||
data_dict = None #free up memory
|
#data_dict = None #free up memory
|
||||||
|
|
||||||
await update_catalog_file_status(catalog_file, 'IN_PROGRESS')
|
await update_catalog_file_status(catalog_file, 'IN_PROGRESS')
|
||||||
|
|
||||||
await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict
|
await bulk_create_gaia_sources(gaia_source_instances) #bulk-create instances from the dict
|
||||||
|
|
||||||
gaia_source_instances = None #free up memory
|
#gaia_source_instances = None #free up memory
|
||||||
|
|
||||||
await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED'
|
await update_catalog_file_status(catalog_file,'INGESTED') #update the catalogfile instance status field to 'INGESTED'
|
||||||
|
|
||||||
@ -157,6 +159,4 @@ class Command(BaseCommand):
|
|||||||
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
|
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
|
||||||
|
|
||||||
|
|
||||||
|
asyncio.run(ingest_files())
|
||||||
|
|
||||||
asyncio.run(ingest_files())
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user