sless-primer/POSTGRES/code/pg-dedup/pg_dedup.py
2026-03-22 17:08:18 +04:00

39 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# 2026-03-21 — pg-dedup: удаляет дубликаты по title, оставляет первый (min id).
# Тестирует: DELETE с subquery, CTE, idempotency (повторный вызов безопасен).
import os, psycopg2
def dedup(event):
dry_run = str(event.get("dry_run", "false")).lower() in ("true", "1", "yes")
conn = psycopg2.connect(
host=os.environ["PGHOST"], port=int(os.environ.get("PGPORT", 5432)),
dbname=os.environ["PGDATABASE"], user=os.environ["PGUSER"],
password=os.environ["PGPASSWORD"], sslmode=os.environ.get("PGSSLMODE", "require"),
)
try:
with conn.cursor() as cur:
# Считаем сколько дублей есть
cur.execute("""
SELECT COUNT(*) FROM terraform_demo_table t1
WHERE EXISTS (
SELECT 1 FROM terraform_demo_table t2
WHERE t2.title = t1.title AND t2.id < t1.id
)
""")
dupes_count = cur.fetchone()[0]
if not dry_run and dupes_count > 0:
cur.execute("""
DELETE FROM terraform_demo_table
WHERE id NOT IN (
SELECT MIN(id) FROM terraform_demo_table GROUP BY title
)
""")
deleted = cur.rowcount
conn.commit()
else:
deleted = 0
return {"duplicates_found": dupes_count, "deleted": deleted, "dry_run": dry_run}
finally:
conn.close()