Update webscraper.py

This commit is contained in:
WhatDidYouExpect 2025-01-21 19:01:01 +01:00 committed by GitHub
parent c87d97da28
commit 5d818a905e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -64,10 +64,6 @@ class WebScraper(commands.Cog):
return False
async def scrape_links(self, session, url, depth=2):
"""Recursively scrape links from a URL."""
if depth == 0 or url in self.visited_urls:
return
print(f"Scraping: {url}")
self.visited_urls.add(url)
@ -81,10 +77,6 @@ class WebScraper(commands.Cog):
sentences = self.extract_sentences(paragraph.get_text())
self.save_to_json(sentences)
for link in soup.find_all('a', href=True):
full_url = urljoin(url, link['href'])
if full_url.startswith("http") and full_url not in self.visited_urls:
await self.scrape_links(session, full_url, depth - 1)
@commands.command()
async def start_scrape(self, ctx, start_url: str):