@@ -13,18 +13,21 @@ def classify(cmpt: bs4.element.Tag) -> str:
1313
1414 # Ordered list of classifiers to try
1515 component_classifiers = [
16+ ClassifyMain .locations , # Check locations (hotels, etc.) before top_stories
1617 ClassifyMain .top_stories , # Check top stories
1718 ClassifyMain .discussions_and_forums , # Check discussions and forums
1819 ClassifyHeaderText .classify , # Check levels 2 & 3 header text
1920 ClassifyMain .news_quotes , # Check news quotes
2021 ClassifyMain .img_cards , # Check image cards
2122 ClassifyMain .images , # Check images
23+ ClassifyMain .ai_overview , # Check AI overview
2224 ClassifyMain .knowledge_panel , # Check knowledge panel
2325 ClassifyMain .knowledge_block , # Check knowledge components
2426 ClassifyMain .banner , # Check for banners
2527 ClassifyMain .finance_panel , # Check finance panel (classify as knowledge)
2628 ClassifyMain .map_result , # Check for map results
2729 ClassifyMain .general_questions , # Check hybrid general questions
30+ ClassifyMain .short_videos , # Check short videos carousel
2831 ClassifyMain .twitter , # Check twitter cards and results
2932 ClassifyMain .general , # Check general components
3033 ClassifyMain .people_also_ask , # Check people also ask
@@ -114,6 +117,15 @@ def images(cmpt: bs4.element.Tag) -> str:
114117 ]
115118 return 'images' if any (conditions ) else "unknown"
116119
120+ @staticmethod
121+ def ai_overview (cmpt : bs4 .element .Tag ) -> str :
122+ """Classify AI Overview components"""
123+ conditions = [
124+ cmpt .find ("div" , {"class" : "Fzsovc" }),
125+ cmpt .find ("h2" ) and cmpt .find ("h2" ).get_text (strip = True ) == "AI Overview" ,
126+ ]
127+ return 'knowledge' if any (conditions ) else "unknown"
128+
117129 @staticmethod
118130 def knowledge_block (cmpt : bs4 .element .Tag ) -> str :
119131 """Classify knowledge block components"""
@@ -133,7 +145,7 @@ def knowledge_box(cmpt: bs4.element.Tag) -> str:
133145 bool (cmpt .find ("div" , {"jscontroller" : "Z2bSc" }))
134146 )
135147 condition ['maps' ] = webutils .check_dict_value (attrs , "data-hveid" , "CAMQAA" )
136- condition ['hotels ' ] = cmpt .find ("div" , {"class" : "zd2Jbb" })
148+ condition ['locations ' ] = cmpt .find ("div" , {"class" : "zd2Jbb" })
137149 condition ['events' ] = cmpt .find ("g-card" , {"class" : "URhAHe" })
138150 condition ['jobs' ] = cmpt .find ("g-card" , {"class" : "cvoI5e" })
139151 text_list = list (cmpt .stripped_strings )
@@ -176,6 +188,24 @@ def people_also_ask(cmpt: bs4.element.Tag) -> str:
176188 conditions = webutils .check_dict_value (cmpt .attrs , "class" , class_list )
177189 return 'people_also_ask' if conditions else "unknown"
178190
191+ @staticmethod
192+ def short_videos (cmpt : bs4 .element .Tag ) -> str :
193+ """Classify short videos carousel"""
194+ heading = cmpt .find ('span' , {'role' : 'heading' , 'class' : 'IFnjPb' })
195+ if heading and heading .get_text (strip = True ) == 'Short videos' :
196+ return 'short_videos'
197+ return "unknown"
198+
199+ @staticmethod
200+ def locations (cmpt : bs4 .element .Tag ) -> str :
201+ """Classify locations components (hotels, etc.)"""
202+ heading = cmpt .find (attrs = {'role' : 'heading' })
203+ if heading :
204+ text = heading .get_text (strip = True )
205+ if text .startswith ('Hotels' ) or text .startswith ('More Hotels' ):
206+ return 'locations'
207+ return "unknown"
208+
179209 @staticmethod
180210 def top_stories (cmpt : bs4 .element .Tag ) -> str :
181211 """Classify top stories components"""
0 commit comments