46 lines
1.3 KiB
Python
46 lines
1.3 KiB
Python
import schedule
|
|
import time
|
|
import subprocess
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
# Setup basic logging for the scheduler
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - SCHEDULER - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler("scheduler.log"),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
def job():
|
|
logging.info("Starting hourly crawler execution...")
|
|
try:
|
|
# Run crawler.py as a separate process to ensure clean memory state
|
|
subprocess.run(["./venv/bin/python", "crawler.py"], check=True)
|
|
logging.info("Crawler execution finished successfully.")
|
|
except subprocess.CalledProcessError as e:
|
|
logging.error(f"Crawler failed with exit code: {e.returncode}")
|
|
except Exception as e:
|
|
logging.error(f"Failed to run crawler: {e}")
|
|
|
|
if __name__ == "__main__":
|
|
logging.info("--- Crawler Scheduler Started ---")
|
|
|
|
# Run once immediately on startup
|
|
job()
|
|
|
|
# Schedule to run every 1 hour
|
|
schedule.every(1).hours.do(job)
|
|
|
|
logging.info("Scheduled to run every 1 hour. Waiting in background...")
|
|
|
|
# Keep the script running
|
|
try:
|
|
while True:
|
|
schedule.run_pending()
|
|
time.sleep(60) # Check every minute
|
|
except KeyboardInterrupt:
|
|
logging.info("Scheduler stopped by user.")
|