diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py
index ca15b4534..7f6e788b3 100644
--- a/crawl4ai/html2text/__init__.py
+++ b/crawl4ai/html2text/__init__.py
@@ -316,6 +316,12 @@ def handle_tag(
if self.tag_callback(self, tag, attrs, start) is True:
return
+ # Handle tag to update base URL for relative links
+ if tag == "base" and start:
+ href = attrs.get("href")
+ if href:
+ self.baseurl = href
+
# first thing inside the anchor tag is another tag
# that produces some output
if (
@@ -1069,6 +1075,15 @@ def update_params(self, **kwargs):
setattr(self, key, value)
def handle_tag(self, tag, attrs, start):
+ # Handle tag to update base URL for relative links
+ # Must be handled before preserved tags since is in
+ if tag == "base" and start:
+ href = attrs.get("href") if attrs else None
+ if href:
+ self.baseurl = href
+ # Also let parent class handle it
+ return super().handle_tag(tag, attrs, start)
+
# Handle preserved tags
if tag in self.preserve_tags:
if start: