feat: Implement Google Business Profile API for review crawling and refine review filtering logic.
This commit is contained in:
parent
00cdae8d88
commit
c5d207ae2b
Binary file not shown.
1035
crawler.log
1035
crawler.log
File diff suppressed because it is too large
Load Diff
@ -158,6 +158,7 @@ def crawl_reviews():
|
||||
rating = rating_map.get(rating_str, 0)
|
||||
|
||||
language = None # The API might not return language explicitly
|
||||
phone = review.get("phone") # Extract phone if provided
|
||||
|
||||
# Upsert logic
|
||||
# We still try to fill place_id if possible, but we don't have it here.
|
||||
@ -169,16 +170,17 @@ def crawl_reviews():
|
||||
|
||||
cur.execute("""
|
||||
INSERT INTO google_review (
|
||||
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language
|
||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
|
||||
review_id, place_id, original_text, author_display_name, publish_time, rating, outlet_code, language, phone
|
||||
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (review_id) DO UPDATE SET
|
||||
original_text = EXCLUDED.original_text,
|
||||
author_display_name = EXCLUDED.author_display_name,
|
||||
rating = EXCLUDED.rating,
|
||||
language = EXCLUDED.language,
|
||||
publish_time = EXCLUDED.publish_time,
|
||||
phone = EXCLUDED.phone,
|
||||
updated_at = CURRENT_TIMESTAMP;
|
||||
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language))
|
||||
""", (review_id, None, comment, author_name, publish_time, rating, outlet_code, language, phone))
|
||||
|
||||
outlet_upserted += 1
|
||||
total_upserted += 1
|
||||
|
||||
@ -38,6 +38,7 @@ def create_table():
|
||||
rating INTEGER,
|
||||
outlet_code VARCHAR(255),
|
||||
language VARCHAR(10),
|
||||
phone VARCHAR(50),
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
CONSTRAINT fk_outlet_code FOREIGN KEY (outlet_code) REFERENCES master_outlet(popcorn_code) ON DELETE SET NULL
|
||||
);
|
||||
|
||||
@ -2,3 +2,35 @@
|
||||
2026-02-25 17:04:03,177 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 17:04:24,449 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 17:04:24,449 - SCHEDULER - Scheduled to run every 1 hour. Waiting in background...
|
||||
2026-02-25 18:04:24,499 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 18:04:53,581 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 19:04:53,602 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 19:05:20,706 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 20:05:20,725 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 20:05:54,926 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 21:05:54,950 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 21:06:21,838 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 22:06:21,859 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 22:06:52,062 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-25 23:06:52,083 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-25 23:07:22,246 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 00:07:22,256 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 00:07:49,724 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 01:07:49,744 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 01:08:18,486 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 02:08:18,506 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 02:08:53,285 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 03:08:53,308 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 03:09:22,580 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 04:09:22,601 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 04:09:52,764 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 05:09:52,785 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 05:10:36,865 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 06:10:36,889 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 06:11:14,467 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 07:11:14,488 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 07:11:41,179 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 08:11:41,197 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 08:12:17,273 - SCHEDULER - Crawler execution finished successfully.
|
||||
2026-02-26 09:12:17,288 - SCHEDULER - Starting hourly crawler execution...
|
||||
2026-02-26 09:12:46,458 - SCHEDULER - Crawler execution finished successfully.
|
||||
|
||||
@ -1 +1 @@
|
||||
{"token": "ya29.a0ATkoCc7vMAnUzeNJOS58XOXup4ELIWeSD_sjoQ1Px40jW2-fcfZW8ed4KdXz9eNYRfvBbxh7cIxPZ7SmsS8eFgnAvzyk06L76HqIrcgnz8Ukpa5YGlEyVozd5MXO6TMKpn6wcMkxI9QH6rqCF0rwlx1cII1lolXf6qNTT_GjqfrWVcSYJD4YLDiyO3F06PFlNdQjJKxeaCgYKAQ4SARUSFQHGX2MiZbK-cs6GQdJB0mNRyyNZXA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-25T10:38:07.053555Z"}
|
||||
{"token": "ya29.a0ATkoCc6zBz3ZXg9Y6QuY0rYBpmXV-LKa8WRGXkeexGJDtwTZrOJdv5wflvR8tsnrMiIH17d5slx3N-pJO56jzMa2D9lPw9YpFwxTaYfy1XBJVrAypCA8VvAKIIylmCkn6mgSpOytqzKwQoE116jB2aYrLcp-1w4xQR03RjeEDl17XVs2Qnepy8cdiLQn_GaJvBiMcqipaCgYKAUwSARUSFQHGX2MiUl8QO9IcH9RqLwF96-KqUA0207", "refresh_token": "1//0g25i7BBB1dGPCgYIARAAGBASNwF-L9IrJM0yRF25DdKtrNrfA41BGH2xipGw_WwMSakD2zgJQH_LoxVqanuFgzq1FxZlmqvR9gQ", "token_uri": "https://oauth2.googleapis.com/token", "client_id": "804823156361-3mk31f6a14r6np9usmm2mo5qnjl9lk00.apps.googleusercontent.com", "client_secret": "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", "scopes": ["https://www.googleapis.com/auth/business.manage"], "universe_domain": "googleapis.com", "account": "", "expiry": "2026-02-26T03:12:16.830406Z"}
|
||||
Loading…
Reference in New Issue
Block a user