gitronald
diff --git a/‎WebSearcher/classifiers/header_text.py‎
Lines changed: 14 additions & 11 deletions b/‎WebSearcher/classifiers/header_text.py‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎WebSearcher/classifiers/main.py‎
Lines changed: 31 additions & 1 deletion b/‎WebSearcher/classifiers/main.py‎
Lines changed: 31 additions & 1 deletion
diff --git a/‎WebSearcher/component_parsers/__init__.py‎
Lines changed: 4 additions & 0 deletions b/‎WebSearcher/component_parsers/__init__.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎WebSearcher/component_parsers/ads.py‎
Lines changed: 13 additions & 0 deletions b/‎WebSearcher/component_parsers/ads.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎WebSearcher/component_parsers/discussions_and_forums.py‎
Lines changed: 31 additions & 9 deletions b/‎WebSearcher/component_parsers/discussions_and_forums.py‎
Lines changed: 31 additions & 9 deletions
diff --git a/‎WebSearcher/component_parsers/knowledge.py‎
Lines changed: 18 additions & 5 deletions b/‎WebSearcher/component_parsers/knowledge.py‎
Lines changed: 18 additions & 5 deletions
diff --git a/‎WebSearcher/component_parsers/locations.py‎
Lines changed: 80 additions & 0 deletions b/‎WebSearcher/component_parsers/locations.py‎
Lines changed: 80 additions & 0 deletions
@@ -52,20 +52,21 @@ def _get_header_level_mapping(level) -> dict:
     TYPE_TO_H2_MAPPING = {
         "directions": ["Directions", 
                        "Ubicaciones"],
-        "discussions_and_forums": ["Discussions and forums"],
-        "general": ["Complementary Results", 
-                    "Web Result with Site Links", 
-                    "Web results", 
-                    "Resultados de la Web", 
-                    "AI-powered overview", 
-                    "Visión general creada por IA", 
-                    "Things to know", 
-                    "Cosas que debes saber"],
+        "discussions_and_forums": ["Discussions and forums",
+                                  "Questions & answers"],
+        "general": ["Complementary Results",
+                    "Web Result with Site Links",
+                    "Web results",
+                    "Resultados de la Web",
+                    "AI-powered overview",
+                    "Visión general creada por IA"],
         "images": ["Images", 
                    "Imágenes"],
         "jobs": ["Jobs", 
                  "Empleos"],
-        "knowledge": ["Calculator Result", 
+        "knowledge": ["Things to know",
+                      "Cosas que debes saber",
+                      "Calculator Result",
                       "Featured snippet from the web", "Fragmento destacado",
                       "Finance Results", "Resumen de Mercado",
                       "From sources across the web", 
@@ -84,7 +85,9 @@ def _get_header_level_mapping(level) -> dict:
                       "Albums", "Álbumes",
                       "About", "Información",
                       "Profiles", "Perfiles"],
-        "local_news": ["Local news", "Noticias Locales"],
+        "latest_from": ["Latest from"],
+        "local_news": ["Local news", "Noticias Locales",
+                       "Latest in local"],
         "local_results": [
             "Local Results",
             "Locations",
 
@@ -13,18 +13,21 @@ def classify(cmpt: bs4.element.Tag) -> str:
 
         # Ordered list of classifiers to try
         component_classifiers = [
+            ClassifyMain.locations,          # Check locations (hotels, etc.) before top_stories
             ClassifyMain.top_stories,        # Check top stories
             ClassifyMain.discussions_and_forums, # Check discussions and forums
             ClassifyHeaderText.classify,     # Check levels 2 & 3 header text
             ClassifyMain.news_quotes,        # Check news quotes
             ClassifyMain.img_cards,          # Check image cards
             ClassifyMain.images,             # Check images
+            ClassifyMain.ai_overview,        # Check AI overview
             ClassifyMain.knowledge_panel,    # Check knowledge panel
             ClassifyMain.knowledge_block,    # Check knowledge components
             ClassifyMain.banner,             # Check for banners
             ClassifyMain.finance_panel,      # Check finance panel (classify as knowledge)
             ClassifyMain.map_result,         # Check for map results
             ClassifyMain.general_questions,  # Check hybrid general questions
+            ClassifyMain.short_videos,       # Check short videos carousel
             ClassifyMain.twitter,            # Check twitter cards and results
             ClassifyMain.general,            # Check general components
             ClassifyMain.people_also_ask,    # Check people also ask
@@ -114,6 +117,15 @@ def images(cmpt: bs4.element.Tag) -> str:
         ]
         return 'images' if any(conditions) else "unknown"
 
+    @staticmethod
+    def ai_overview(cmpt: bs4.element.Tag) -> str:
+        """Classify AI Overview components"""
+        conditions = [
+            cmpt.find("div", {"class": "Fzsovc"}),
+            cmpt.find("h2") and cmpt.find("h2").get_text(strip=True) == "AI Overview",
+        ]
+        return 'knowledge' if any(conditions) else "unknown"
+
     @staticmethod
     def knowledge_block(cmpt: bs4.element.Tag) -> str:
         """Classify knowledge block components"""
@@ -133,7 +145,7 @@ def knowledge_box(cmpt: bs4.element.Tag) -> str:
             bool(cmpt.find("div", {"jscontroller": "Z2bSc"}))
         )
         condition['maps'] = webutils.check_dict_value(attrs, "data-hveid", "CAMQAA")
-        condition['hotels'] = cmpt.find("div", {"class": "zd2Jbb"})
+        condition['locations'] = cmpt.find("div", {"class": "zd2Jbb"})
         condition['events'] = cmpt.find("g-card", {"class": "URhAHe"})
         condition['jobs'] = cmpt.find("g-card", {"class": "cvoI5e"})
         text_list = list(cmpt.stripped_strings)
@@ -176,6 +188,24 @@ def people_also_ask(cmpt: bs4.element.Tag) -> str:
         conditions = webutils.check_dict_value(cmpt.attrs, "class", class_list)
         return 'people_also_ask' if conditions else "unknown"
 
+    @staticmethod
+    def short_videos(cmpt: bs4.element.Tag) -> str:
+        """Classify short videos carousel"""
+        heading = cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'})
+        if heading and heading.get_text(strip=True) == 'Short videos':
+            return 'short_videos'
+        return "unknown"
+
+    @staticmethod
+    def locations(cmpt: bs4.element.Tag) -> str:
+        """Classify locations components (hotels, etc.)"""
+        heading = cmpt.find(attrs={'role': 'heading'})
+        if heading:
+            text = heading.get_text(strip=True)
+            if text.startswith('Hotels') or text.startswith('More Hotels'):
+                return 'locations'
+        return "unknown"
+
     @staticmethod
     def top_stories(cmpt: bs4.element.Tag) -> str:
         """Classify top stories components"""
 
@@ -18,11 +18,13 @@
 from .recent_posts import parse_recent_posts
 
 from .local_results import parse_local_results
+from .locations import parse_locations
 from .map_results import parse_map_results
 from .news_quotes import parse_news_quotes
 from .people_also_ask import parse_people_also_ask
 from .scholarly_articles import parse_scholarly_articles
 from .searches_related import parse_searches_related
+from .short_videos import parse_short_videos
 from .shopping_ads import parse_shopping_ads
 from .twitter_cards import parse_twitter_cards
 from .twitter_result import parse_twitter_result
@@ -54,13 +56,15 @@
     ('latest_from', parse_latest_from, 'Latest From'),
     ('local_news', parse_local_news, 'Local News'),
     ('local_results', parse_local_results, 'Local Results'),
+    ('locations', parse_locations, 'Locations'),
     ('map_results', parse_map_results, 'Map Results'),
     ('news_quotes', parse_news_quotes, 'News Quotes'),
     ('people_also_ask', parse_people_also_ask, 'People Also Ask'),
     ('perspectives', parse_perspectives, 'Perspectives & Opinions'),
     ('recent_posts', parse_recent_posts, 'Recent Posts'),
     ('scholarly_articles', parse_scholarly_articles, 'Scholar Articles'),
     ('searches_related', parse_searches_related, 'Related Searches'),
+    ('short_videos', parse_short_videos, 'Short Videos'),
     ('shopping_ads', parse_shopping_ads, 'Shopping Ad'),
     ('top_stories', parse_top_stories, 'Top Stories'),
     ('twitter_cards', parse_twitter_cards, 'Twitter Cards'),
 
@@ -183,6 +183,8 @@ def parse_ad_menu(sub: bs4.element.Tag) -> list:
     """Parse menu items for a large ad with additional subresults"""
 
     parsed_items = DetailsList()
+
+    # Format 1: MhgNwc items with MUxGbd sub-divs
     menu_items = sub.find_all('div', {'class': 'MhgNwc'})
     for item in menu_items:
         parsed_item = DetailsItem()
@@ -194,6 +196,17 @@ def parse_ad_menu(sub: bs4.element.Tag) -> list:
             else:
                 parsed_item.text = webutils.get_text(div) or ''
         parsed_items.append(parsed_item)
+
+    # Format 2: bOeY0b sitelinks section
+    if not parsed_items:
+        sitelink_div = sub.find('div', {'class': 'bOeY0b'})
+        if sitelink_div:
+            for link in sitelink_div.find_all('a', href=True):
+                text = link.get_text(strip=True)
+                href = link.get('href', '')
+                if text and href:
+                    parsed_items.append(DetailsItem(url=href, title=text))
+
     return parsed_items.to_dicts()
 
 
 
@@ -11,28 +11,50 @@
     ('div', {'class': 'VZGVuc'}),
 ]
 
+SUB_SELECTORS = [
+    ("div", {"class": "LJ7wUe"}),
+    ("div", {"class": "JlqpRe"}),
+    ("div", {"class": "EDblX"}),
+]
+
 
-def parse_discussions_and_forums(cmpt:bs4.element.Tag) -> list:
+def parse_discussions_and_forums(cmpt: bs4.element.Tag) -> list:
     """Parse a 'Discussions and forums' component"""
-    subs = cmpt.find_all("div", {"class":"LJ7wUe"})
-    parsed_list = [parse_discussions_and_forums_item(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
-    return parsed_list
+    for tag, attrs in SUB_SELECTORS:
+        subs = cmpt.find_all(tag, attrs)
+        if subs:
+            return [parse_item(sub, sub_rank) for sub_rank, sub in enumerate(subs)]
+    return []
+
 
-def parse_discussions_and_forums_item(cmpt:bs4.element.Tag, sub_rank:int = 0) -> dict:
+def parse_item(cmpt: bs4.element.Tag, sub_rank: int = 0) -> dict:
     """Parse a 'Discussions and forums' subcomponent"""
     return {
         "type": "discussions_and_forums",
         "sub_type": None,
         "sub_rank": sub_rank,
-        "title": webutils.get_text_by_selectors(cmpt, TITLE_SELECTORS),
+        "title": get_title(cmpt),
         "url": get_url(cmpt),
-        "cite": webutils.get_text_by_selectors(cmpt, CITE_SELECTORS)
+        "cite": get_cite(cmpt),
     }
 
+
+def get_title(sub):
+    """Get title from selectors or heading div"""
+    title = webutils.get_text_by_selectors(sub, TITLE_SELECTORS)
+    if not title:
+        title = webutils.get_text(sub, 'div', {'role': 'heading'})
+    return title
+
+
+def get_cite(sub):
+    """Get cite from selectors"""
+    return webutils.get_text_by_selectors(sub, CITE_SELECTORS)
+
+
 def get_url(sub):
     """Get URL from a subcomponent; try multiple, take first non-null"""
-    url_list = [webutils.get_link(sub, {"class":"v4kUNc"}),
+    url_list = [webutils.get_link(sub, {"class": "v4kUNc"}),
                 webutils.get_link(sub)]
     url_list = [url for url in url_list if url]
     return url_list[0] if url_list else None
-
 
@@ -79,12 +79,19 @@ def parse_knowledge_panel(cmpt, sub_rank=0) -> list:
     ):
         parsed['sub_type'] = 'finance'
 
-    elif cmpt.find('div', {'role':'button'}) and cmpt.find('div', {'role':'button'}).text == 'Dictionary':
+    elif (
+        cmpt.find('div', {'data-attrid': 'DictionaryHeader'}) or
+        (cmpt.find('div', {'role':'button'}) and cmpt.find('div', {'role':'button'}).text == 'Dictionary')
+    ):
         parsed['sub_type'] = 'dictionary'
-        span_first = cmpt.find('span', {'jsslot':''})
-        if span_first:
-            span = span_first.find_all('span')
-            details['text'] = get_text(span).split('Translate')[0] if span else None
+        vmod = cmpt.find('div', {'class': 'vmod'})
+        if vmod:
+            details['text'] = vmod.get_text(' ', strip=True).split('Translate')[0]
+        else:
+            span_first = cmpt.find('span', {'jsslot':''})
+            if span_first:
+                span = span_first.find_all('span')
+                details['text'] = get_text(span).split('Translate')[0] if span else None
 
     elif (
         cmpt.find('h2') and cmpt.find('h2').text == 'Translation Result' or
@@ -102,6 +109,12 @@ def parse_knowledge_panel(cmpt, sub_rank=0) -> list:
         span = cmpt.find_all(['span'])
         details['text'] = get_text(span) if span else None
 
+    elif cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'}):
+        heading_span = cmpt.find('span', {'role': 'heading', 'class': 'IFnjPb'})
+        if heading_span and heading_span.text.strip() in ('Things to know', 'Cosas que debes saber'):
+            parsed['sub_type'] = 'things_to_know'
+            details['heading'] = heading_span.text.strip()
+
     else:
         parsed['sub_type'] = 'panel'
         div = cmpt.find_all(['span','div','a'], string=True)
 
@@ -0,0 +1,80 @@
+"""Parser for locations components (hotels, etc.)"""
+
+import bs4
+from .. import webutils
+
+
+def parse_locations(cmpt: bs4.element.Tag) -> list:
+    """Parse a locations component (e.g. hotel listings)"""
+
+    sub_type = classify_locations_sub_type(cmpt)
+    if sub_type == 'hotels':
+        return parse_hotels(cmpt)
+    return [{'type': 'locations', 'sub_rank': 0, 'error': f'unknown sub_type: {sub_type}'}]
+
+
+def classify_locations_sub_type(cmpt: bs4.element.Tag) -> str:
+    """Classify the sub-type of a locations component"""
+    heading = cmpt.find(attrs={'role': 'heading'})
+    if heading:
+        text = heading.get_text(strip=True)
+        if 'Hotels' in text or 'Hotel' in text:
+            return 'hotels'
+    # Check for /travel/ links as fallback
+    if cmpt.find('a', href=lambda h: h and '/travel/' in h):
+        return 'hotels'
+    return 'unknown'
+
+
+def parse_hotels(cmpt: bs4.element.Tag) -> list:
+    """Parse hotel items from a locations component"""
+
+    items = []
+    for a in cmpt.find_all('a', href=True):
+        href = a.get('href', '')
+        if '/travel/' not in href:
+            continue
+        name_div = a.find('div', {'class': 'sxdlOc'}) or a.find('div', {'class': 'BTPx6e'})
+        if not name_div:
+            continue
+        items.append(_parse_hotel_item(a, len(items)))
+
+    if not items:
+        return [{'type': 'locations', 'sub_type': 'hotels', 'sub_rank': 0,
+                 'error': 'no hotel items found'}]
+    return items
+
+
+def _parse_hotel_item(a: bs4.element.Tag, sub_rank: int) -> dict:
+    """Parse a single hotel item from an anchor tag"""
+    name_div = a.find('div', {'class': 'sxdlOc'}) or a.find('div', {'class': 'BTPx6e'})
+    price_span = a.find('span', {'class': 'sRlU8b'})
+    rating_span = a.find('span', {'class': 'yi40Hd'})
+    reviews_span = a.find('span', {'class': 'RDApEe'})
+    stars_span = a.find('span', {'class': 'NAkmnc'})
+    desc_div = a.find('div', {'class': 'S7Ajc'})
+
+    return {
+        'type': 'locations',
+        'sub_type': 'hotels',
+        'sub_rank': sub_rank,
+        'title': name_div.get_text(strip=True) if name_div else None,
+        'url': a.get('href'),
+        'text': desc_div.get_text(strip=True) if desc_div else None,
+        'cite': None,
+        'details': _parse_hotel_details(price_span, rating_span, reviews_span, stars_span),
+    }
+
+
+def _parse_hotel_details(price_span, rating_span, reviews_span, stars_span) -> dict:
+    """Extract hotel metadata"""
+    details = {}
+    if price_span:
+        details['price'] = price_span.get_text(strip=True)
+    if rating_span:
+        details['rating'] = rating_span.get_text(strip=True)
+    if reviews_span:
+        details['reviews'] = reviews_span.get_text(strip=True)
+    if stars_span:
+        details['stars'] = stars_span.get_text(strip=True)
+    return details if details else None