reorganization

This commit is contained in:
Никита Тырин 2024-09-10 14:28:57 +03:00
parent 0f1088a066
commit 3a9ba2f320
8 changed files with 119 additions and 177 deletions

View File

@ -20,7 +20,7 @@ BASE_DIR = Path(__file__).resolve().parent.parent
# See https://docs.djangoproject.com/en/5.1/howto/deployment/checklist/
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-k_$r^3e!9ycqnt0=+ur&sx#hsl44_+v3=al2$_gpnh3u^w!xj$'
SECRET_KEY = 'django-insecure-m!o^q^+en&_v%64&m8%d^%_olkzf7$8jbp0^4dph2=1rn=666m'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
@ -31,7 +31,6 @@ ALLOWED_HOSTS = []
# Application definition
INSTALLED_APPS = [
#'rest_framework',
'sample_app',
'django.contrib.admin',
'django.contrib.auth',

View File

@ -15,10 +15,8 @@ Including another URLconf
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.contrib import admin
from django.urls import path, include
from django.urls import path
urlpatterns = [
path('admin/', admin.site.urls),
path('api/', include('sample_app.urls')), # Include your app's URLs
]

View File

@ -4,36 +4,23 @@ import glob
import uuid
import asyncio
from datetime import datetime, timedelta
import pandas as pd
import django
from asgiref.sync import sync_to_async
import pandas as pd
# #environment init for django
# current_dir = os.getcwd()
# relative_path = os.path.join(current_dir, 'gaia_orm')
# os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'gaia_orm.settings')
# sys.path.append(os.path.normpath(relative_path))
# #sys.path.append('/home/kityr/practiceproject/gaia_orm')
# django.setup()
#import models for both the sources and the files
from django.core.management.base import BaseCommand
from sample_app.models import GaiaSource, CatalogFile
class Command(BaseCommand):
help = 'Ingest CSV files into the database'
def handle(self, *args, **options):
#fetching the file list
directory = input("Please enter the path to the directory containing the csv files (or leave empty if the files are in the same directory as this script): ")
csv_files = glob.glob(os.path.join(directory, '*csv*'))
print(f"Files found: {len(csv_files)}.")
print("Populating the file database...")
self.stdout.write(f"Files found: {len(csv_files)}.")
self.stdout.write("Populating the file database...")
#initialize the counter
new_files_count = 0
#add files as catalogfile instances into the database
@ -48,7 +35,7 @@ for file_path in csv_files:
if created:
new_files_count += 1
#show how many duplicates were already in db
print(f"File database populated. {len(csv_files) - new_files_count} were already in the database.")
self.stdout.write(f"File database populated. {len(csv_files) - new_files_count} were already in the database.")
@ -81,7 +68,7 @@ def count_ingested_files():
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S")
print(f"[{current_time}] Starting the data ingestion.")
self.stdout.write(f"[{current_time}] Starting the data ingestion.")
#function that iterates over all catalog files and ingests sources from them
async def ingest_files():
@ -93,17 +80,18 @@ async def ingest_files():
for catalog_file in catalog_files:
if catalog_file.status == 'INGESTED':
print(f"Skipping {catalog_file.name} as it is already ingested.")
self.stdout.write(f"Skipping {catalog_file.name} as it is already ingested.")
continue
file_path = os.path.join(directory, catalog_file.name)
#print(file_path)
#self.stdout.write(file_path)
if os.path.exists(file_path): #check if the file exists at all just in case
pass
if catalog_file.status == 'IN_PROGRESS':
print(f"{catalog_file.name} seems to have been interrupted, starting over.")#if the file status is in_progress, it must've
self.stdout.write(f"{catalog_file.name} seems to have been interrupted, starting over.")#if the file status is in_progress, it must've
await delete_gaiasources_for_catalogfile(catalog_file) #been interrupted mid-ingest, delete all the
#associated sources if there are any and start over
@ -136,8 +124,8 @@ async def ingest_files():
ingested_files_count = await count_ingested_files()
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress print statement
print(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")
current_time = (datetime.now() + timedelta(hours=3)).strftime("%H:%M:%S") #Timestamp and progress self.stdout.write statement
self.stdout.write(f"[{current_time}] {ingested_files_count}/{len(catalog_files)}")

View File

@ -1,4 +1,4 @@
# Generated by Django 5.1.1 on 2024-09-09 12:01
# Generated by Django 5.1.1 on 2024-09-10 11:15
import django.db.models.deletion
import uuid
@ -18,7 +18,7 @@ class Migration(migrations.Migration):
fields=[
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('name', models.CharField(max_length=32)),
('status', models.CharField(choices=[('PENDING', 'Pending'), ('INGESTED', 'Ingested'), ('INDEXED', 'Indexed')], default='PENDING', max_length=10)),
('status', models.CharField(choices=[('PENDING', 'Pending'), ('IN_PROGRESS', 'In Progress'), ('INGESTED', 'Ingested'), ('INDEXED', 'Indexed')], default='PENDING', max_length=11)),
],
),
migrations.CreateModel(

View File

@ -1,18 +0,0 @@
# Generated by Django 5.1.1 on 2024-09-09 14:26
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('sample_app', '0001_initial'),
]
operations = [
migrations.AlterField(
model_name='catalogfile',
name='status',
field=models.CharField(choices=[('PENDING', 'Pending'), ('IN_PROGRESS', 'In Progress'), ('INGESTED', 'Ingested'), ('INDEXED', 'Indexed')], default='PENDING', max_length=11),
),
]

View File

@ -1,7 +0,0 @@
from rest_framework import serializers
from .models import GaiaSource
class GaiaSourceSerializer(serializers.ModelSerializer):
class Meta:
model = GaiaSource
fields = '__all__'

View File

@ -1,7 +1 @@
from django.urls import path
from .views import GaiaSourceListCreate, GaiaSourceDetail
urlpatterns = [
path('GaiaSource/', GaiaSourceListCreate.as_view(), name='GaiaSource-list-create'),
path('GaiaSource/<int:pk>/', GaiaSourceDetail.as_view(), name='GaiaSource-detail'),
]

View File

@ -1,13 +1 @@
from django.shortcuts import render
from rest_framework import generics
from .models import GaiaSource
from .serializers import GaiaSourceSerializer
class GaiaSourceListCreate(generics.ListCreateAPIView):
queryset = GaiaSource.objects.all()
serializer_class = GaiaSourceSerializer
class GaiaSourceDetail(generics.RetrieveUpdateDestroyAPIView):
queryset = GaiaSource.objects.all()
serializer_class = GaiaSourceSerializer