feat: Implement Google Business Profile API for review crawling and refine review filtering logic.
This commit is contained in:
parent
00cdae8d88
commit
c5d207ae2b
Binary file not shown.
1035
crawler.log
1035
crawler.log
File diff suppressed because it is too large
Load Diff
@ -158,6 +158,7 @@ def crawl_reviews():
|
|||||||
rating = rating_map.get(rating_str, 0)
|
rating = rating_map.get(rating_str, 0)
|
||||||
|
|
||||||
language = None # The API might not return language explicitly
|
language = None # The API might not return language explicitly
|
||||||
|
phone = review.get("phone") # Extract phone if provided
|
||||||
|
|
||||||
# Upsert logic
|
# Upsert logic
|
||||||
# We still try to fill place_id if possible, but we don't have it here.
|
# We still try to fill place_id if possible, but we don't have it here.
|
||||||
@ -169,16 +170,17 @@ def crawl_reviews():
|
|||||||
|
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
INSERT INTO google_review (
|
INSERT INTO google_review (
|
||||||
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language
|
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language, phone
|
||||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||||
ON CONFLICT (review_id) DO UPDATE SET
|
ON CONFLICT (review_id) DO UPDATE SET
|
||||||
original_text = EXCLUDED.original_text,
|
original_text = EXCLUDED.original_text,
|
||||||
author_display_name = EXCLUDED.author_display_name,
|
author_display_name = EXCLUDED.author_display_name,
|
||||||
rating = EXCLUDED.rating,
|
rating = EXCLUDED.rating,
|
||||||
language = EXCLUDED.language,
|
language = EXCLUDED.language,
|
||||||
publish_time = EXCLUDED.publish_time,
|
publish_time = EXCLUDED.publish_time,
|
||||||
|
phone = EXCLUDED.phone,
|
||||||
updated_at = CURRENT_TIMESTAMP;
|
updated_at = CURRENT_TIMESTAMP;
|
||||||
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language))
|
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language, phone))
|
||||||
|
|
||||||
outlet_upserted += 1
|
outlet_upserted += 1
|
||||||
total_upserted += 1
|
total_upserted += 1
|
||||||
|
|||||||
@ -38,6 +38,7 @@ def create_table():
|
|||||||
rating INTEGER,
|
rating INTEGER,
|
||||||
outlet_code VARCHAR(255),
|
outlet_code VARCHAR(255),
|
||||||
language VARCHAR(10),
|
language VARCHAR(10),
|
||||||
|
phone VARCHAR(50),
|
||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
CONSTRAINT fk_outlet_code FOREIGN KEY (outlet_code) REFERENCES master_outlet(popcorn_code) ON DELETE SET NULL
|
CONSTRAINT fk_outlet_code FOREIGN KEY (outlet_code) REFERENCES master_outlet(popcorn_code) ON DELETE SET NULL
|
||||||
);
|
);
|
||||||
|
|||||||
@ -2,3 +2,35 @@
|
|||||||
2026-02-25 17:04:03,177 - SCHEDULER - Starting hourly crawler execution...
|
2026-02-25 17:04:03,177 - SCHEDULER - Starting hourly crawler execution...
|
||||||
2026-02-25 17:04:24,449 - SCHEDULER - Crawler execution finished successfully.
|
2026-02-25 17:04:24,449 - SCHEDULER - Crawler execution finished successfully.
|
||||||
2026-02-25 17:04:24,449 - SCHEDULER - Scheduled to run every 1 hour. Waiting in background...
|
2026-02-25 17:04:24,449 - SCHEDULER - Scheduled to run every 1 hour. Waiting in background...
|
||||||
|
2026-02-25 18:04:24,499 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 18:04:53,581 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-25 19:04:53,602 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 19:05:20,706 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-25 20:05:20,725 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 20:05:54,926 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-25 21:05:54,950 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 21:06:21,838 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-25 22:06:21,859 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 22:06:52,062 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-25 23:06:52,083 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-25 23:07:22,246 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 00:07:22,256 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 00:07:49,724 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 01:07:49,744 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 01:08:18,486 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 02:08:18,506 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 02:08:53,285 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 03:08:53,308 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 03:09:22,580 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 04:09:22,601 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 04:09:52,764 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 05:09:52,785 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 05:10:36,865 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 06:10:36,889 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 06:11:14,467 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 07:11:14,488 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 07:11:41,179 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 08:11:41,197 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 08:12:17,273 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
2026-02-26 09:12:17,288 - SCHEDULER - Starting hourly crawler execution...
|
||||||
|
2026-02-26 09:12:46,458 - SCHEDULER - Crawler execution finished successfully.
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
{"token": "ya29.a0ATkoCc7vMAnUzeNJOS58XOXup4ELIWeSD_sjoQ1Px40jW2-fcfZW8ed4KdXz9eNYRfvBbxh7cIxPZ7SmsS8eFgnAvzyk06L76HqIrcgnz8Ukpa5YGlEyVozd5MXO6TMKpn6wcMkxI9QH6rqCF0rwlx1cII1lolXf6qNTT_GjqfrWVcSYJD4YLDiyO3F06PFlNdQjJKxeaCgYKAQ4SARUSFQHGX2MiZbK-cs6GQdJB0mNRyyNZXA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-25T10:38:07.053555Z"}
|
{"token": "ya29.a0ATkoCc6zBz3ZXg9Y6QuY0rYBpmXV-LKa8WRGXkeexGJDtwTZrOJdv5wflvR8tsnrMiIH17d5slx3N-pJO56jzMa2D9lPw9YpFwxTaYfy1XBJVrAypCA8VvAKIIylmCkn6mgSpOytqzKwQoE116jB2aYrLcp-1w4xQR03RjeEDl17XVs2Qnepy8cdiLQn_GaJvBiMcqipaCgYKAUwSARUSFQHGX2MiUl8QO9IcH9RqLwF96-KqUA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-26T03:12:16.830406Z"}
|
||||||
Loading…
Reference in New Issue
Block a user