39 lines
1.5 KiB
Python
39 lines
1.5 KiB
Python
# 2026-03-21 — pg-dedup: удаляет дубликаты по title, оставляет первый (min id).
|
||
# Тестирует: DELETE с subquery, CTE, idempotency (повторный вызов безопасен).
|
||
import os, psycopg2
|
||
|
||
def dedup(event):
|
||
dry_run = str(event.get("dry_run", "false")).lower() in ("true", "1", "yes")
|
||
conn = psycopg2.connect(
|
||
host=os.environ["PGHOST"], port=int(os.environ.get("PGPORT", 5432)),
|
||
dbname=os.environ["PGDATABASE"], user=os.environ["PGUSER"],
|
||
password=os.environ["PGPASSWORD"], sslmode=os.environ.get("PGSSLMODE", "require"),
|
||
)
|
||
try:
|
||
with conn.cursor() as cur:
|
||
# Считаем сколько дублей есть
|
||
cur.execute("""
|
||
SELECT COUNT(*) FROM terraform_demo_table t1
|
||
WHERE EXISTS (
|
||
SELECT 1 FROM terraform_demo_table t2
|
||
WHERE t2.title = t1.title AND t2.id < t1.id
|
||
)
|
||
""")
|
||
dupes_count = cur.fetchone()[0]
|
||
|
||
if not dry_run and dupes_count > 0:
|
||
cur.execute("""
|
||
DELETE FROM terraform_demo_table
|
||
WHERE id NOT IN (
|
||
SELECT MIN(id) FROM terraform_demo_table GROUP BY title
|
||
)
|
||
""")
|
||
deleted = cur.rowcount
|
||
conn.commit()
|
||
else:
|
||
deleted = 0
|
||
|
||
return {"duplicates_found": dupes_count, "deleted": deleted, "dry_run": dry_run}
|
||
finally:
|
||
conn.close()
|