feat: Implement Google Business Profile API for review crawling and refine review filtering logic.

This commit is contained in:
Suherdy Yacob 2026-02-26 09:34:20 +07:00
parent 00cdae8d88
commit c5d207ae2b
6 changed files with 1074 additions and 4 deletions

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -158,6 +158,7 @@ def crawl_reviews():
rating = rating_map.get(rating_str, 0)
language = None # The API might not return language explicitly
phone = review.get("phone") # Extract phone if provided
# Upsert logic
# We still try to fill place_id if possible, but we don't have it here.
@ -169,16 +170,17 @@ def crawl_reviews():
cur.execute("""
INSERT INTO google_review (
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language, phone
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
ON CONFLICT (review_id) DO UPDATE SET
original_text = EXCLUDED.original_text,
author_display_name = EXCLUDED.author_display_name,
rating = EXCLUDED.rating,
language = EXCLUDED.language,
publish_time = EXCLUDED.publish_time,
phone = EXCLUDED.phone,
updated_at = CURRENT_TIMESTAMP;
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language))
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language, phone))
outlet_upserted += 1
total_upserted += 1

View File

@ -38,6 +38,7 @@ def create_table():
rating INTEGER,
outlet_code VARCHAR(255),
language VARCHAR(10),
phone VARCHAR(50),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
CONSTRAINT fk_outlet_code FOREIGN KEY (outlet_code) REFERENCES master_outlet(popcorn_code) ON DELETE SET NULL
);

View File

@ -2,3 +2,35 @@
2026-02-25 17:04:03,177 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 17:04:24,449 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 17:04:24,449 - SCHEDULER - Scheduled to run every 1 hour. Waiting in background...
2026-02-25 18:04:24,499 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 18:04:53,581 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 19:04:53,602 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 19:05:20,706 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 20:05:20,725 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 20:05:54,926 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 21:05:54,950 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 21:06:21,838 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 22:06:21,859 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 22:06:52,062 - SCHEDULER - Crawler execution finished successfully.
2026-02-25 23:06:52,083 - SCHEDULER - Starting hourly crawler execution...
2026-02-25 23:07:22,246 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 00:07:22,256 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 00:07:49,724 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 01:07:49,744 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 01:08:18,486 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 02:08:18,506 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 02:08:53,285 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 03:08:53,308 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 03:09:22,580 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 04:09:22,601 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 04:09:52,764 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 05:09:52,785 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 05:10:36,865 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 06:10:36,889 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 06:11:14,467 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 07:11:14,488 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 07:11:41,179 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 08:11:41,197 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 08:12:17,273 - SCHEDULER - Crawler execution finished successfully.
2026-02-26 09:12:17,288 - SCHEDULER - Starting hourly crawler execution...
2026-02-26 09:12:46,458 - SCHEDULER - Crawler execution finished successfully.

View File

@ -1 +1 @@
{"token": "ya29.a0ATkoCc7vMAnUzeNJOS58XOXup4ELIWeSD_sjoQ1Px40jW2-fcfZW8ed4KdXz9eNYRfvBbxh7cIxPZ7SmsS8eFgnAvzyk06L76HqIrcgnz8Ukpa5YGlEyVozd5MXO6TMKpn6wcMkxI9QH6rqCF0rwlx1cII1lolXf6qNTT_GjqfrWVcSYJD4YLDiyO3F06PFlNdQjJKxeaCgYKAQ4SARUSFQHGX2MiZbK-cs6GQdJB0mNRyyNZXA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-25T10:38:07.053555Z"}
{"token": "ya29.a0ATkoCc6zBz3ZXg9Y6QuY0rYBpmXV-LKa8WRGXkeexGJDtwTZrOJdv5wflvR8tsnrMiIH17d5slx3N-pJO56jzMa2D9lPw9YpFwxTaYfy1XBJVrAypCA8VvAKIIylmCkn6mgSpOytqzKwQoE116jB2aYrLcp-1w4xQR03RjeEDl17XVs2Qnepy8cdiLQn_GaJvBiMcqipaCgYKAUwSARUSFQHGX2MiUl8QO9IcH9RqLwF96-KqUA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-26T03:12:16.830406Z"}