inputSchema
{
"type": "object",
"required": [
"raw_html"
],
"properties": {
"raw_html": {
"type": "string",
"description": "Full HTML of a public article page (e.g. mp.weixin.qq.com) or fragment."
},
"fetched_at": {
"type": "string",
"description": "ISO timestamp when host fetched the page (optional)."
},
"source_url": {
"type": "string",
"description": "Canonical URL for traceability (optional)."
},
"fallback_title": {
"type": "string",
"description": "Used when title cannot be extracted from HTML."
}
}
}
outputSchema
{
"type": "object",
"required": [
"title",
"published_at",
"author_display",
"is_original",
"tags",
"body_text",
"summary_one_line",
"warnings"
],
"properties": {
"tags": {
"type": "array",
"items": {
"type": "string"
}
},
"title": {
"type": "string"
},
"warnings": {
"type": "array",
"items": {
"type": "string"
}
},
"body_text": {
"type": "string",
"description": "Plain text body, whitespace normalized."
},
"is_original": {
"type": "boolean",
"description": "Heuristic from page markers (e.g. 原创)."
},
"published_at": {
"type": "string",
"description": "ISO-8601 when parsed; empty if unknown."
},
"author_display": {
"type": "string"
},
"summary_one_line": {
"type": "string"
}
}
}