-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLoad_Data_Scripts.py
More file actions
55 lines (48 loc) · 1.79 KB
/
Load_Data_Scripts.py
File metadata and controls
55 lines (48 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import psycopg2
import sys
sys.stdout.reconfigure(encoding='utf-8')
# Step 1: Connect to PostgreSQL
conn = psycopg2.connect(
host="localhost",
port="5432",
dbname="Health_DataBase", # 🔁 Replace with your DB name
user="postgres", # 🔁 Replace with your DB username
password="postgres" # 🔁 Replace with your DB password
)
cur = conn.cursor()
# Step 2: Get absolute path to cleaned files
base_dir = os.path.dirname(os.path.abspath(__file__)) # gets /database/
clean_dir = os.path.abspath(os.path.join(base_dir, "../data/outputs/"))
# Step 3: File → Table mapping
file_to_table = {
"cleaned_DimPatient.csv": "dimpatient",
"cleaned_DimPhysician.csv": "dimphysician",
"cleaned_DimSpeciality.csv": "dimspeciality",
"cleaned_DimHospital.csv": "dimhospital",
"cleaned_DimPayer.csv": "dimpayer",
"cleaned_DimCptCode.csv": "dimcptcode",
"cleaned_DimDiagnosisCode.csv": "dimdiagnosiscode",
"cleaned_DimDate.csv": "dimdate",
"cleaned_DimTransaction.csv": "dimtransaction",
"cleaned_FactTable.csv": "facttable"
}
# Step 4: Load each CSV into corresponding table
for file, table in file_to_table.items():
file_path = os.path.join(clean_dir, file)
print(f"🔎 Looking for file: {file_path}")
if not os.path.exists(file_path):
print(f"❌ File not found: {file_path}")
continue
with open(file_path, "r", encoding="utf-8") as f:
next(f) # Skip header row
try:
cur.copy_expert(f"COPY {table} FROM STDIN WITH CSV HEADER DELIMITER ','", f)
print(f"✅ Loaded {file} into {table}")
except Exception as e:
print(f"❌ Error loading {file} → {table}: {e}")
# Step 5: Commit and close
conn.commit()
cur.close()
conn.close()
print("🚀 All done.")