Sunday, 17 October 2021

Java: How to read the content of a webpage or url?

In this post, I am going to show multiple programs/ways to read the content of a webpage or url in Java.

 

Approach 1: Using built-in HttpURLConnection class.

 

ReadDataFromUrlDemo1.java

package com.sample.app;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;

public class ReadDataFromUrlDemo1 {

	public static String getContentFromUrl(String urlToRead) throws Exception {

		URL url = new URL(urlToRead);
		HttpURLConnection httpUrlConnection = (HttpURLConnection) url.openConnection();
		httpUrlConnection.setRequestMethod("GET");

		StringBuilder result = new StringBuilder();

		try (InputStream inputStream = httpUrlConnection.getInputStream();
				InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
				BufferedReader reader = new BufferedReader(inputStreamReader)) {
			
			for (String line; (line = reader.readLine()) != null;) {
				result.append(line);
			}
			
		}
		return result.toString();
	}

	public static void main(String args[]) throws Exception {
		String urlToRead = "https://self-learning-java-tutorial.blogspot.com/2014/02/blog-post.html";
		String result = getContentFromUrl(urlToRead);
		System.out.println(result);
	}

}

 

Approach 2: Using Spring RestTemplate. One of the key advantages of Http client over HttpURLConnection is, it can handle URL redirects and proxy negotiations etc.,

 

ReadDataFromUrlDemo2.java

package com.sample.app;

import org.apache.http.client.config.RequestConfig;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.web.client.RestTemplate;

public class ReadDataFromUrlDemo2 {

	private static final RestTemplate REST_TEMPLATE = new RestTemplate(getClientHttpRequestFactory());

	private static ClientHttpRequestFactory getClientHttpRequestFactory() {
		int timeout = 5000;
		RequestConfig config = RequestConfig.custom().setConnectTimeout(timeout).setConnectionRequestTimeout(timeout)
				.setSocketTimeout(timeout).build();
		CloseableHttpClient client = HttpClientBuilder.create().setDefaultRequestConfig(config).build();
		return new HttpComponentsClientHttpRequestFactory(client);
	}

	public static String getContentFromUrl(String urlToRead) throws Exception {
		ResponseEntity<String> responseEntity = REST_TEMPLATE.exchange(urlToRead, HttpMethod.GET, null, String.class);
		return responseEntity.getBody();

	}

	public static void main(String args[]) throws Exception {
		String urlToRead = "https://self-learning-java-tutorial.blogspot.com/2018/08/spring-framework.html";
		String result = getContentFromUrl(urlToRead);
		System.out.println(result);
	}
}

Dependencies used

<dependencies>
	<dependency>
		<groupId>org.apache.httpcomponents</groupId>
		<artifactId>httpclient</artifactId>
		<version>4.5.10</version>
	</dependency>

	<dependency>
		<groupId>org.springframework</groupId>
		<artifactId>spring-web</artifactId>
		<version>5.3.11</version>
	</dependency>

</dependencies>

Approach 3: Using apache http client.

 

ReadDataFromUrlDemo3.java

package com.sample.app;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;

import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;

public class ReadDataFromUrlDemo3 {

	public static String getContentFromUrl(String urlToRead) throws Exception {
		CloseableHttpClient httpclient = HttpClients.createDefault();
		HttpGet httpget = new HttpGet(urlToRead);
		CloseableHttpResponse response = httpclient.execute(httpget);
		HttpEntity entity = response.getEntity();

		StringBuilder result = new StringBuilder();

		try (InputStream inputStream = entity.getContent();
				InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
				BufferedReader reader = new BufferedReader(inputStreamReader)) {

			for (String line; (line = reader.readLine()) != null;) {
				result.append(line);
			}

		}
		return result.toString();

	}

	public static void main(String args[]) throws Exception {
		String urlToRead = "https://self-learning-java-tutorial.blogspot.com/2018/08/spring-framework.html";
		String result = getContentFromUrl(urlToRead);
		System.out.println(result);
	}
}



  

Previous                                                    Next                                                    Home

No comments:

Post a Comment