# 2026-03-21 — pg-dedup: удаляет дубликаты по title, оставляет первый (min id). # Тестирует: DELETE с subquery, CTE, idempotency (повторный вызов безопасен). import os, psycopg2 def dedup(event): dry_run = str(event.get("dry_run", "false")).lower() in ("true", "1", "yes") conn = psycopg2.connect( host=os.environ["PGHOST"], port=int(os.environ.get("PGPORT", 5432)), dbname=os.environ["PGDATABASE"], user=os.environ["PGUSER"], password=os.environ["PGPASSWORD"], sslmode=os.environ.get("PGSSLMODE", "require"), ) try: with conn.cursor() as cur: # Считаем сколько дублей есть cur.execute(""" SELECT COUNT(*) FROM terraform_demo_table t1 WHERE EXISTS ( SELECT 1 FROM terraform_demo_table t2 WHERE t2.title = t1.title AND t2.id < t1.id ) """) dupes_count = cur.fetchone()[0] if not dry_run and dupes_count > 0: cur.execute(""" DELETE FROM terraform_demo_table WHERE id NOT IN ( SELECT MIN(id) FROM terraform_demo_table GROUP BY title ) """) deleted = cur.rowcount conn.commit() else: deleted = 0 return {"duplicates_found": dupes_count, "deleted": deleted, "dry_run": dry_run} finally: conn.close()