{"id":9093,"date":"2022-12-16T18:17:41","date_gmt":"2022-12-16T10:17:41","guid":{"rendered":"http:\/\/123.57.164.21\/?p=9093"},"modified":"2023-01-19T08:37:57","modified_gmt":"2023-01-19T00:37:57","slug":"java%e5%ae%9e%e7%8e%b0%e4%bb%8ehtml%e6%96%87%e6%9c%ac%e4%b8%ad%e6%8f%90%e5%8f%96%e7%ba%af%e6%96%87%e6%9c%ac%e7%9a%84%e6%96%b9%e6%b3%95","status":"publish","type":"post","link":"https:\/\/92it.top\/?p=9093","title":{"rendered":"Java\u5b9e\u73b0\u4eceHtml\u6587\u672c\u4e2d\u63d0\u53d6\u7eaf\u6587\u672c\u7684\u65b9\u6cd5"},"content":{"rendered":"\n<p><strong>1\u3001\u5e94\u7528\u573a\u666f\uff1a<\/strong>\u4ece\u4e00\u4efdhtml\u6587\u4ef6\u4e2d\u6216\u4eceString\uff08\u662fhtml\u5185\u5bb9\uff09\u4e2d\u63d0\u53d6\u7eaf\u6587\u672c\uff0c\u53bb\u6389\u7f51\u9875\u6807\u7b7e\uff1b<\/p>\n\n\n\n<p><strong>2\u3001\u4ee3\u7801\u4e00\uff1a<\/strong>replaceAll\u641e\u5b9a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">\/\/\u4ecehtml\u4e2d\u63d0\u53d6\u7eaf\u6587\u672c \npublic static String StripHT(String strHtml) { \nString txtcontent = strHtml.replaceAll(\"&lt;\/?[^>]+>\", \"\"); \/\/\u5254\u51fa&lt;html>\u7684\u6807\u7b7e \ntxtcontent = txtcontent.replaceAll(\"&lt;a>\\\\s*|\\t|\\r|\\n&lt;\/a>\", \"\");\/\/\u53bb\u9664\u5b57\u7b26\u4e32\u4e2d\u7684\u7a7a\u683c,\u56de\u8f66,\u6362\u884c\u7b26,\u5236\u8868\u7b26 \nreturn txtcontent; \n} <\/pre>\n\n\n\n<p><strong>3\u3001\u4ee3\u7801\u4e8c\uff1a<\/strong>\u6b63\u5219\u8868\u8fbe\u5f0f\u641e\u5b9a<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">\/\/\u4ecehtml\u4e2d\u63d0\u53d6\u7eaf\u6587\u672c\n\tpublic static String Html2Text(String inputString) {\n\t\tString htmlStr = inputString; \/\/ \u542bhtml\u6807\u7b7e\u7684\u5b57\u7b26\u4e32\n\t\tString textStr = \"\";\n\t\tjava.util.regex.Pattern p_script;\n\t\tjava.util.regex.Matcher m_script;\n\t\tjava.util.regex.Pattern p_style;\n\t\tjava.util.regex.Matcher m_style;\n\t\tjava.util.regex.Pattern p_html;\n\t\tjava.util.regex.Matcher m_html;\n\t\ttry {\n\t\t\tString regEx_script = \"&lt;[\\\\s]*?script[^>]*?>[\\\\s\\\\S]*?&lt;[\\\\s]*?\\\\\/[\\\\s]*?script[\\\\s]*?>\"; \/\/ \u5b9a\u4e49script\u7684\u6b63\u5219\u8868\u8fbe\u5f0f{\u6216&lt;script[^>]*?>[\\\\s\\\\S]*?&lt;\\\\\/script>\n\t  String regEx_style = \"&lt;[\\\\s]*?style[^>]*?>[\\\\s\\\\S]*?&lt;[\\\\s]*?\\\\\/[\\\\s]*?style[\\\\s]*?>\"; \/\/ \u5b9a\u4e49style\u7684\u6b63\u5219\u8868\u8fbe\u5f0f{\u6216&lt;style[^>]*?>[\\\\s\\\\S]*?&lt;\\\\\/style>\n\t  String regEx_html = \"&lt;[^>]+>\"; \/\/ \u5b9a\u4e49HTML\u6807\u7b7e\u7684\u6b63\u5219\u8868\u8fbe\u5f0f\n\t  p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);\n\t  m_script = p_script.matcher(htmlStr);\n\t  htmlStr = m_script.replaceAll(\"\"); \/\/ \u8fc7\u6ee4script\u6807\u7b7e\n\t  p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);\n\t  m_style = p_style.matcher(htmlStr);\n\t  htmlStr = m_style.replaceAll(\"\"); \/\/ \u8fc7\u6ee4style\u6807\u7b7e\n\t  p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);\n\t  m_html = p_html.matcher(htmlStr);\n\t  htmlStr = m_html.replaceAll(\"\"); \/\/ \u8fc7\u6ee4html\u6807\u7b7e\n\t  textStr = htmlStr;\n\t } catch (Exception e) {System.err.println(\"Html2Text: \" + e.getMessage()); }\n\t\t\/\/\u5254\u9664\u7a7a\u683c\u884c\n\t\ttextStr=textStr.replaceAll(\"[ ]+\", \" \");\n\t\ttextStr=textStr.replaceAll(\"(?m)^\\\\s*$(\\\\n|\\\\r\\\\n)\", \"\");\n\t\treturn textStr;\/\/ \u8fd4\u56de\u6587\u672c\u5b57\u7b26\u4e32\n\t}<\/pre>\n\n\n\n<p><strong>4\u3001\u4ee3\u7801\u4e09\uff1a<\/strong>HTMLEditorKit.ParserCallback\u641e\u5b9a\uff0cJava\u81ea\u5e26\u7684\u7c7b<\/p>\n\n\n\n<pre class=\"EnlighterJSRAW\" data-enlighter-language=\"generic\" data-enlighter-theme=\"\" data-enlighter-highlight=\"\" data-enlighter-linenumbers=\"\" data-enlighter-lineoffset=\"\" data-enlighter-title=\"\" data-enlighter-group=\"\">package com.jtb.nucleus.integration.utils;\n\nimport java.io.*;\nimport javax.swing.text.html.*;\nimport javax.swing.text.html.parser.*;\n\npublic class Html2Text extends HTMLEditorKit.ParserCallback {\n\tStringBuffer s;\n\n\tpublic Html2Text() {\n\t}\n\n\tpublic void parse(Reader in) throws IOException {\n\t\ts = new StringBuffer();\n\t\tParserDelegator delegator = new ParserDelegator();\n\t\t\/\/ the third parameter is TRUE to ignore charset directive\n\t\tdelegator.parse(in, this, Boolean.TRUE);\n\t}\n\n\tpublic void handleText(char[] text, int pos) {\n\t\ts.append(text);\n\t}\n\n\tpublic String getText() {\n\t\treturn s.toString();\n\t}\n\n\tpublic static void main(String[] args) {\n\t\ttry {\n\t\t\t\/\/ the HTML to convert\n\t\t\t\/\/ Reader in=new StringReader(\"string\");\n\t\t\tFileReader in = new FileReader(\"java-new.html\");\n\t\t\tHtml2Text parser = new Html2Text();\n\t\t\tparser.parse(in);\n\t\t\tin.close();\n\t\t\tSystem.out.println(parser.getText());\n\t\t} catch (Exception e) {\n\t\t\te.printStackTrace();\n\t\t}\n\t}\n}<\/pre>\n","protected":false},"excerpt":{"rendered":"<p>1\u3001\u5e94\u7528\u573a\u666f\uff1a\u4ece\u4e00\u4efdhtml\u6587\u4ef6\u4e2d\u6216\u4eceString\uff08\u662fhtml\u5185\u5bb9\uff09\u4e2d\u63d0\u53d6\u7eaf\u6587\u672c\uff0c\u53bb\u6389\u7f51\u9875\u6807\u7b7e\uff1b 2\u3001\u4ee3\u7801\u4e00\uff1a [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"closed","ping_status":"","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[26,27],"tags":[],"_links":{"self":[{"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/posts\/9093"}],"collection":[{"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/92it.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=9093"}],"version-history":[{"count":2,"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/posts\/9093\/revisions"}],"predecessor-version":[{"id":9167,"href":"https:\/\/92it.top\/index.php?rest_route=\/wp\/v2\/posts\/9093\/revisions\/9167"}],"wp:attachment":[{"href":"https:\/\/92it.top\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=9093"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/92it.top\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=9093"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/92it.top\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=9093"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}