1、.net core 3.1 C# selenium
//爬取所有省份、城市、区县
public override async Task WriteAreaToFileAsync(string configPath, string directory)
{
const string BaseProvinceLink = "https://www.qcc.com/search?key={keyword}#industrycode:K&";
const string BaseCityLink = "https://www.qcc.com/search_getCityListHtml?province={0}";
const string BaseCountyLink = "https://www.qcc.com/search_getCountyListHtml?city={0}";
List<string> provinces = await GetCodeAsync(new Uri(BaseProvinceLink), ".sfilter-tag.clearfix.provinceChoose dd a");
const string baseText = "province:{0}&city:{1}&county:{2}&";
List<string> list = new List<string>();
foreach (var province in provinces)
{
StringBuilder.Clear();
Uri provinceUri = new Uri(StringBuilder.AppendFormat(BaseCityLink, province).ToString());
var cities = await GetCodeAsync(provinceUri, "dd a");
foreach (var city in cities)
{
StringBuilder.Clear();
Uri cityUri = new Uri(StringBuilder.AppendFormat(BaseCountyLink, city).ToString());
var counties = await GetCodeAsync(cityUri, "dd a");
foreach (var county in counties)
{
StringBuilder.Clear();
StringBuilder.Append(BaseProvinceLink);
string area = StringBuilder.AppendFormat(baseText, province, city, county).
Replace("search", "search_index").Replace("中介#", "中介&ajaxflag=1&")
.Replace(":industrycode", "=industrycode").ToString();
list.Add(area);
}
}
}
await File.WriteAllLinesAsync("企查查.txt", list);
}
//分页爬取企业信息
private async Task<bool> GetAgentsAsync(Uri cityUri)
{
LogHelper.Info(cityUri.ToString());
var pageSource = await HttpClient.GetStringAsync(cityUri);
while (!pageSource.Contains("查企业"))
{
if (pageSource.StartsWith("<script>window.location"))
{
VertifyCode(new Uri(pageSource.Split("'")[1]));
pageSource = await HttpClient.GetStringAsync(cityUri);
}
else if (pageSource.Contains("小查还没找到数据"))
{
return false;
}
}
var block = JumonyParser.Parse(pageSource).Find(".m_srchList tbody tr td:nth-child(3)");
foreach (var item in block)
{
await VertifyAsync(item.InnerHtml());
}
if (block.Count() < PageSize)
{
return false;
}
return true;
}
2、结果截图
3、需要开通vip账号
4、过滑动验证码