使用java将网页保存为mht格式(2)计算机二级考试

文章作者 100test 发表时间 2009:06:15 09:48:38
来源 100Test.Com百考试题网


  //设置网页正文
  MimeBodyPart bp = new MimeBodyPart().
  bp.setText(content, strEncoding).
  bp.addHeader("Content-Type", "text/html.charset=" strEncoding).
  bp.addHeader("Content-Location", strWeb.toString()).
  mp.addBodyPart(bp).
  int urlCount = urlScriptList.size().
  for (int i = 0. i <. urlCount. i ) {
  bp = new MimeBodyPart().
  ArrayList urlInfo = (ArrayList) urlScriptList.get(i).
  // String url = urlInfo.get(0).toString().
  String absoluteURL = urlInfo.get(1).toString().
  bp
  .addHeader("Content-Location",
  javax.mail.internet.MimeUtility
  .encodeWord(java.net.URLDecoder
  .decode(absoluteURL, strEncoding))).
  DataSource source = new AttachmentDataSource(absoluteURL, "text").
  bp.setDataHandler(new DataHandler(source)).
  mp.addBodyPart(bp).
  }
  urlCount = urlImageList.size().
  for (int i = 0. i <. urlCount. i ) {
  bp = new MimeBodyPart().
  ArrayList urlInfo = (ArrayList) urlImageList.get(i).
  // String url = urlInfo.get(0).toString().
  String absoluteURL = urlInfo.get(1).toString().
  bp
  .addHeader("Content-Location",
  javax.mail.internet.MimeUtility
  .encodeWord(java.net.URLDecoder
  .decode(absoluteURL, strEncoding))).
  DataSource source = new AttachmentDataSource(absoluteURL, "image").
  bp.setDataHandler(new DataHandler(source)).
  mp.addBodyPart(bp).
  }
  msg.setContent(mp).
  // write the mime multi part message to a file
  msg.writeTo(new FileOutputStream(strFileName)).
  }
  /**
  *方法说明:mht转html
  *输入参数:strMht mht文件路径. strHtml html文件路径
  *返回类型:
  */
  public static void mht2html(String strMht, String strHtml) {
  try {
  //TODO readEmlFile
  InputStream fis = new FileInputStream(strMht).
  Session mailSession = Session.getDefaultInstance(System.getProperties(), null).
  MimeMessage msg = new MimeMessage(mailSession, fis).
  Object content = msg.getContent().
  if (content instanceof Multipart) {
  MimeMultipart mp = (MimeMultipart)content.
  MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0).
  String strEncodng = getEncoding(bp1).
  String strText = getHtmlText(bp1, strEncodng).
  if (strText == null)
  return.
  File parent = null.
  if (mp.getCount() >. 1) {
  parent = new File(new File(strHtml).getAbsolutePath() ".files").
  parent.mkdirs().
  if (!parent.exists())
  return.
  }
  for (int i = 1. i <. mp.getCount(). i) {
  MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i).
  String strUrl = getResourcesUrl(bp).
  if (strUrl == null)
  continue.
  DataHandler dataHandler = bp.getDataHandler().
  MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource().
  File resources = new File(parent.getAbsolutePath() File.separator getName(strUrl, i)).
  if (saveResourcesFile(resources, bp.getInputStream()))
  strText = JHtmlClear.replace(strText, strUrl, resources.getAbsolutePath()).
  }
  saveHtml(strText, strHtml).
  }
  } catch (Exception e) {
  // TODO Auto-generated catch block
  e.printStackTrace().
  }
  }
  /**
  *方法说明:得到资源文件的name
  *输入参数:strName 资源文件链接, ID 资源文件的序号
  *返回类型:资源文件的本地临时文件名
  */
  public static String getName(String strName, int ID) {
  char separator = ’/’.
  System.out.println(strName).
  System.out.println(separator).
  if( strName.lastIndexOf(separator) >.= 0)
  return format(strName.substring(strName.lastIndexOf(separator) 1)).
  return "temp" ID.
  }
  /**
  *方法说明:得到网页编码
  *输入参数:bp MimeBodyPart类型的网页内容
  *返回类型:MimeBodyPart里的网页内容的编码
  */
  private static String getEncoding(MimeBodyPart bp) {
  if (bp != null) {
  try {
  Enumeration list = bp.getAllHeaders().
  while (list.hasMoreElements()) {
  javax.mail.Header head = (javax.mail.Header)list.nextElement().
  if (head.getName().compareTo("Content-Type") == 0) {
  String strType = head.getValue().
  int pos = strType.indexOf("charset=").
  if (pos != -1) {
  String strEncoding = strType.substring(pos 8, strType.length()).
  if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
  strEncoding = "gbk".
  }
  return strEncoding.
  }
  }
  }
  } catch (MessagingException e) {
  // TODO Auto-generated catch block
  e.printStackTrace().
  }
  }
  return null.
  }
  /**
  *方法说明:得到资源文件url
  *输入参数:bp MimeBodyPart类型的网页内容
  *返回类型:资源文件url
  */

相关文章


可能导致Java内存泄漏的几项原因计算机二级考试
使用java将网页保存为mht格式(2)计算机二级考试
使用java将网页保存为mht格式(1)计算机二级考试
澳大利亚华人论坛
考好网
日本华人论坛
华人移民留学论坛
英国华人论坛