import requests from bs4 import BeautifulSoup import random class JokeSource: def get_name(self): return "Generic Source" def fetch_joke(self): """Returns a single joke string or None.""" raise NotImplementedError class JednorozecJokeSource(JokeSource): def get_name(self): return "vtipy.jednorozec.cz" def fetch_joke(self): url = "https://vtipy.jednorozec.cz/" try: # Add a User-Agent to be polite and avoid basic blocking headers = {'User-Agent': 'Mozilla/5.0 (compatible; PrintServer/1.0)'} response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') jokes = [] # Strategy 1: Look for specific classes often used in blogs/joke sites # We look for divs that might contain the joke text potential_classes = ['post', 'entry', 'hentry', 'joke', 'vtip'] for class_name in potential_classes: elements = soup.find_all(class_=lambda x: x and class_name in x.split()) if elements: for el in elements: for br in el.find_all("br"): br.replace_with("\n") text = el.get_text() lines = [line.strip() for line in text.splitlines() if line.strip()] text = "\n".join(lines) # Filter out very short texts (titles, metadata) and ensure safety limit if len(text) > 20 and len(lines) <= 20: jokes.append(text) if jokes: break # Strategy 2: Fallback to all paragraphs if no specific container found if not jokes: for p in soup.find_all('p'): for br in p.find_all("br"): br.replace_with("\n") text = p.get_text() lines = [line.strip() for line in text.splitlines() if line.strip()] text = "\n".join(lines) if len(text) > 50 and len(lines) <= 20: # Assume jokes are somewhat long paragraphs jokes.append(text) if jokes: return random.choice(jokes) return None except Exception as e: raise e class BestPageJokeSource(JokeSource): def get_name(self): return "bestpage.cz" def fetch_joke(self): url = "https://bestpage.cz/vtipy/" try: headers = {'User-Agent': 'Mozilla/5.0 (compatible; PrintServer/1.0)'} response = requests.get(url, headers=headers, timeout=10) # Older sites often use windows-1250 or iso-8859-2 response.encoding = response.apparent_encoding soup = BeautifulSoup(response.content, 'html.parser') jokes = [] # Bestpage is an older site, often using tables or simple paragraphs for el in soup.find_all(['p', 'div', 'td']): for br in el.find_all("br"): br.replace_with("\n") text = el.get_text() lines = [line.strip() for line in text.splitlines() if line.strip()] text = "\n".join(lines) if 50 < len(text) < 1000 and len(lines) <= 20: jokes.append(text) if jokes: return random.choice(jokes) return None except Exception as e: raise e