1.VS2019工程属性:
$(ProjectDir);$(ProjectDir)libxml2-2.10.3;$(ProjectDir)libxml2-2.10.3\include;$(ProjectDir)libxml2-2.10.3\vcxproject;
2.工程添加项目文件:
3.使用代码案例:
#include <libxml/HTMLparser.h>
#include <libxml.h>
#include <libxml/xpath.h>
static xmlXPathObjectPtr getNodeset(xmlDocPtr doc, const xmlChar* xpath)
{
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
context = xmlXPathNewContext(doc);
if (context == NULL) {
printf("context is NULL\n");
return NULL;
}
result = xmlXPathEvalExpression(xpath, context);
xmlXPathFreeContext(context);
if (result == NULL) {
printf("xmlXPathEvalExpression return NULL\n");
return NULL;
}
if (xmlXPathNodeSetIsEmpty(result->nodesetval)) {
xmlXPathFreeObject(result);
printf("nodeset is empty\n");
return NULL;
}
return result;
}
static void ParseHtml2(const std::wstring& fileName, const std::wstring& appId)
{
FILE* fp = _wfopen(fileName.c_str(), L"r");
fseek(fp, 0, SEEK_END);
std::string data(ftell(fp), '\0');
fseek(fp, 0, SEEK_SET);
fread(data.data(), data.size(), 1, fp);
fclose(fp);
for (auto it = data.begin(); it != data.end(); )
{
if (*it == '\n')
{
it = data.erase(it);
}
else
{
it++;
}
}
data = ANSIToUTF8(data.c_str()).c_str();
htmlDocPtr hDocPtr = htmlParseDoc(BAD_CAST(data.c_str()), NULL);
if (hDocPtr != NULL)
{
std::unordered_map<std::wstring, std::wstring> mapAttrsAD = {};
std::unordered_map<std::wstring, std::wstring> mapAttrsURL = {};
{
std::string xpath = ("//li[@class='search-tags-item t-ad']");
xmlXPathObjectPtr app_result = getNodeset(hDocPtr, BAD_CAST(xpath.c_str()));
if (app_result != NULL)
{
int i = 0;
xmlChar* value = NULL;
if (app_result)
{
xmlNodeSetPtr nodeset = app_result->nodesetval;
xmlNodePtr cur = NULL;
for (i = 0; i < nodeset->nodeNr; i++)
{
cur = nodeset->nodeTab[i];
if (cur != NULL)
{
xmlAttr* pXmlAttr = cur->properties;
while (pXmlAttr != NULL)
{
std::wstring name = (pXmlAttr->name == NULL) ? L"" : UTF8ToWIDE((const char*)pXmlAttr->name);
std::wstring value = (xmlGetProp(cur, pXmlAttr->name) == NULL) ? L"" : UTF8ToWIDE((const char*)xmlGetProp(cur, pXmlAttr->name));
mapAttrsAD.insert(std::unordered_map<std::wstring, std::wstring>::value_type(name, value));
pXmlAttr = pXmlAttr->next;
}
break;
}
}
}
}
}
std::string xpath = ("//a[@data-hottag='download.detail.normal." + std::string(WIDEToUTF8(appId).c_str()) + "']");
xmlXPathObjectPtr app_result = getNodeset(hDocPtr, BAD_CAST(xpath.c_str()));
if (app_result == NULL)
{
printf("app_result is NULL\n");
return;
}
int i = 0;
xmlChar* value;
if (app_result)
{
xmlNodeSetPtr nodeset = app_result->nodesetval;
xmlNodePtr cur;
for (i = 0; i < nodeset->nodeNr; i++)
{
cur = nodeset->nodeTab[i];
if (cur != NULL)
{
xmlAttr* pXmlAttr = cur->properties;
while (pXmlAttr != NULL)
{
std::wstring name = (pXmlAttr->name == NULL) ? L"" : UTF8ToWIDE((const char*)pXmlAttr->name);
std::wstring value = (xmlGetProp(cur, pXmlAttr->name) == NULL) ? L"" : UTF8ToWIDE((const char*)xmlGetProp(cur, pXmlAttr->name));
mapAttrsURL.insert(std::unordered_map<std::wstring, std::wstring>::value_type(name, value));
pXmlAttr = pXmlAttr->next;
}
}
cur = cur->xmlChildrenNode;
while (cur != NULL)
{
if (!xmlStrcmp(cur->name, (const xmlChar*)"name"))
printf("%s\n", ((char*)XML_GET_CONTENT(cur->xmlChildrenNode)));
else if (!xmlStrcmp(cur->name, (const xmlChar*)"url"))
printf("%s\n", ((char*)XML_GET_CONTENT(cur->xmlChildrenNode)));
cur = cur->next;
}
}
xmlXPathFreeObject(app_result);
}
}
}