2021-03-08
爬虫代理ip如何设置api?
发现很多朋友不知道如何使用从API端口提取的IP代理模式,不知道流程。在这里,以爬虫为例,我们将向您展示如何使用爬虫IP代理。感兴趣的朋友可以来了解下啦~下面就是从中选择一个最合适的ip。实现思路:第一步、获取该页面内容,提取每一行ip信息。第二步、新建一个java Bean 封装每一个ip属性。第三步、将所有的javaBean添加到一个arraylist中。第四步、对整个arraylist排序。代码如下:/* * 下载时事可用的网络爬虫代理 */public class CrawlProxyIp5Net {public static ArrayList<ProxyConfigBean> getProxyConfigs(){ArrayList<ProxyConfigBean> list = new ArrayList<ProxyConfigBean>();try {WebClient client = new WebClient(BrowserVersion.CHROME);client.getOptions().setJavaScriptEnabled(false);client.getOptions().setCssEnabled(false);HtmlPage page = client.getPage("http://pachong.org/");HtmlTableBody tableBody = (HtmlTableBody) page.getByXPath("//table[@class='tb']/tbody").get(0);List<HtmlTableRow> tableRows = tableBody.getRows();if(tableRows!=null){for(int i=0;i<tableRows.size();i++){ProxyConfigBean bean = new ProxyConfigBean();HtmlTableRow tableRow = tableRows.get(i);HtmlTableCell ipCell = tableRow.getCell(1);HtmlTableCell portCell = tableRow.getCell(2);HtmlTableCell countryCell = tableRow.getCell(3);HtmlTableCell typeCell = tableRow.getCell(4);HtmlTableCell statuCell = tableRow.getCell(5);String ip = MyStringUtils.pureString(ipCell.asText());bean.setIp(ip);String portValue = MyStringUtils.pureString(portCell.asText());if(portValue!=null && !portValue.matches("^[0-9]")){int port = Integer.parseInt(portValue);bean.setPort(port);}String country = MyStringUtils.pureString(countryCell.asText());bean.setCountry(country);String type = MyStringUtils.pureString(typeCell.asText());bean.setType(type);String statu = MyStringUtils.pureString(statuCell.asText());bean.setStatu(statu);//最后设置优先级,在设置优先级之前,必须设置好其他属性的值bean.setPriority();list.add(bean);}}client.closeAllWindows();Collections.sort(list,new Comparator<ProxyConfigBean>() {@Overridepublic int compare(ProxyConfigBean bean1, ProxyConfigBean bean2) {// TODO Auto-generated method stubint scores1 = bean1.getPriority();int scores2 = bean2.getPriority();return scores2-scores1;}});} catch (FailingHttpStatusCodeException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (MalformedURLException e) {// TODO Auto-generated catch blocke.printStackTrace();} catch (IOException e) {// TODO Auto-generated catch blocke.printStackTrace();}return list;}public static ProxyConfig getProxyConfig(){ArrayList<ProxyConfigBean> list = getProxyConfigs();if(list!=null && list.size()>0){ProxyConfigBean bean = list.get(0);ProxyConfig proxyConfig = new ProxyConfig();proxyConfig.setProxyHost(bean.getIp());proxyConfig.setProxyPort(bean.getPort());return proxyConfig;}else{return null;}}public static void main(String[] args) {ArrayList<ProxyConfigBean> list = getProxyConfigs();for(int i=0;i<list.size();i++){ProxyConfigBean bean = list.get(i);System.out.println(bean.getCountry() +" "+bean.getPort()+ " "+bean.getIp());}System.out.println("done.......");}}其中JAVABean代码:public class ProxyConfigBean {private String ip;private int port;private String country;//中国,其他国家private String type;//匿名 程度high、anonymous、elite、transparentprivate String statu;//空闲、繁忙、较忙private int priority;//优先级public String getIp() {return ip;}public void setIp(String ip) {this.ip = ip;}public int getPort() {return port;}public void setPort(int port) {this.port = port;}public String getCountry() {return country;}public void setCountry(String country) {this.country = country;}public String getType() {return type;}public void setType(String type) {this.type = type;}public String getStatu() {return statu;}public void setStatu(String statu) {this.statu = statu;}public int getPriority() {return priority;}public void setPriority() {//根据国家设置优先级if(this.country.contains("中国")){this.priority +=50;}//根据状态设置优先级,空闲(10)、繁忙(0)、较忙(5)if(this.statu.contains("空闲")){this.priority +=20;}else if(this.statu.contains("较忙")){this.priority +=10;}//根据类型设置优先级匿名 程度high(4)、anonymous(5)、elite(2)、transparent(3)if(this.type.contains("anonymous")){this.priority +=5;}else if(this.type.contains("high")){this.priority +=4;}else if(this.type.contains("transparent")){this.priority +=3;}else if(this.type.contains("elite")){this.priority +=2;}//根据端口,80端口最优if(this.port == 80){this.priority +=1;}}}大家如果想设置爬虫代理ip中的api,不妨可以试试小编的这个流程哦~希望可以帮助大家!