word域代码转换html丢失解决办法.docx

上传人:scccc 文档编号:12021964 上传时间:2021-12-01 格式:DOCX 页数:13 大小:38.98KB
返回 下载 相关 举报
word域代码转换html丢失解决办法.docx_第1页
第1页 / 共13页
word域代码转换html丢失解决办法.docx_第2页
第2页 / 共13页
word域代码转换html丢失解决办法.docx_第3页
第3页 / 共13页
word域代码转换html丢失解决办法.docx_第4页
第4页 / 共13页
word域代码转换html丢失解决办法.docx_第5页
第5页 / 共13页
点击查看更多>>
资源描述

《word域代码转换html丢失解决办法.docx》由会员分享,可在线阅读,更多相关《word域代码转换html丢失解决办法.docx(13页珍藏版)》请在三一文库上搜索。

1、Word转html存在域代码丢失。Aspose , jacob, poi 都无法解决在使用 jocob 转换成 html 时域代码会被 <!-if supportFields><!-end if->包裹可以统一提取出来转换成latex , latex转换成图片,解决 word域代码丢失问题private void processFormula(List<Node>nodes ) throwsUnsupportedEncodingException for ( int i = nodes .size()-1; i >=0; i -) Node node

2、= nodes .get( i ); if ( node instanceofElement)Element e = (Element) node ; processFormula(e.childNodes(); else if ( node instanceof Comment) String commentText = node .toString(); if (commentText .contains( "<!-if supportFields>")Comment comment = (Comment) node ;String latex = EqFo

3、rmulaLatexUtil.getLatex ( comment );if (StringUtils. isNotBlank ( latex ) String latexTemp = newString(Base64. encodeBase64 ( latex .getBytes(); latex = URLEncoder. encode ( latex , "utf-8");latex = latex String url = node .after( ' data-latex='"+latexTemp +"'src='

4、;" /* */package com.iflytek.edu.tlsys.rawpaper2x.utils;import java.io.File;import java.io.IOException;import .URLEncoder;import java.util.ArrayList;.replace( "+" , "%20");LATEXURL +latex ;'<img encode='encode'+ url +"'/>");import java.util.Lis

5、t;import org.jsoup.Jsoup;import org.jsoup.nodes.Comment;import org.jsoup.nodes.Document;import org.jsoup.nodes.Element;import org.jsoup.select.Elements;import org.springframework.util.CollectionUtils;import mon.util.StringUtils;/* author feiwang8* 2016年8月24日下午3:45:05*/public class EqFormulaLatexUtil

6、 /* 左括号替代符号* /public static final String LEFTBRACKETRE =" < "/* 右括号替代符号* /public static final String RIGHTBRACKETRE ="'、"/* 左括号* /public static final String LEFTBRACKET ="("/* 右括号* /public static final String RIGHTBRACKET =")"/*双正则*/publicstaticfinalStr

7、ingREGEXPART_F="A(sS*)|F()+),(sS*)|F()+)$”;/* 单正则* /public static final String REGEXPART="(sS*)|F()+)”;/* 逗号替代符号*/public static final String COMMARE = " l、/*逗号*/public static final String COMMA =","/* 更加 comment 获取 latex* param comment html Comment* return latex* /public sta

8、tic String getLatex(Comment comment) return getLatex(comment.toString();/* 根据 comment 获取 latex* param comment html Comment* return latex* /public static String getLatex(String comment) String html = comment.replace("<!-if supportFields>","") .replace("<!endif->&q

9、uot;,"") .replace(char) 10 + "","") .replace(char) 13 + "","");Document doc = Jsoup.parse(html);Elements sups = doc.select("sup");Elements subs = doc.select("sub");/纠正上标for(Element sup:sups)String text = sup.text();sup.tagName(&qu

10、ot;span");sup.text("s("+text+",)");/纠正下标for(Element sub:subs)String text = sub.text();sub.tagName("span");sub.text("s( ,"+text+")");String eqtext = doc.body().text();eqtext = mergeSubSup(eqtext);/无法被StringUtil判断为空白的空白eqtext = eqtext.replace(char

11、) 8203 + "","");eqtext = eqtext.replace(char) 160 + "", " ").replace(" ","");eqtext = eqtext.trim();/ System.out.println(eqtext+">");if(eqtext.startsWith("eq")eqtext = eqtext.replaceFirst("eq", "")

12、.trim();eqtext=eqtext.replace(""+LEFTBRACKET,""+LEFTBRACKETRE).replace(""+RIGHTBRACKET,""+RIGHTBRACKETRE).replace(""+COMMA, ""+COMMARE);String latex = parserElements(eqtext);returnlatex.replace(""+LEFTBRACKETRE,""+LEFTBR

13、ACKET).replace(""+RIGHTBRACKETRE, ""+RIGHTBRACKET).replace(""+COMMARE, ""+COMMA); return "" private static String mergeSubSup(String latex)/ 这里需要合并上下标例如:eq isu(s(i, )s(= , )s(1, ),s( ,3),x)/->eq isu(s(i = 1, ),s( ,3),x)/TODO return latex; /* param

14、 eqtext* return* /private static String parserElements(String eqtext) String latex =""if(eqtext.contains("")&&eqtext.contains(LEFTBRACKET)&&eqtext.contains(RIGHTBR ACKET)String preText = eqtext.substring(0,eqtext.indexOf("");int startIndex = eqtext.index

15、Of("");int endIndex= getNextLeftBra(startIndex,'(',eqtext);int nextBra = getNextBraIndex(endIndex+1,eqtext);String name = eqtext.substring(startIndex,endIndex);String text = eqtext.substring(endIndex+1,nextBra);String suffText= eqtext.substring(nextBra+1,eqtext.length();latex = par

16、serElements(preText) + parserToLatex(name.trim(),text) +parserElements(suffText);elselatex = eqtext;return latex;private static int getNextLeftBra(int start,char sym,String eqtext) for(int i =start;i<eqtext.length();i+ )char leftbra = eqtext.charAt(i);if(leftbra = sym)return i;return eqtext.lengt

17、h();/* 获取配对括号的位置* param text 文本* return 位置*/private static int getNextBraIndex(int start,String text)int leftbra = 0;int rightbra = 0;for(int i = start-1 ; i<text.length();i+)char c = text.charAt(i);if(c = ')')leftbra +;if(c ='(')rightbra+;if(rightbra!=0&&leftbra=rightbra&

18、amp;&i>=start) return i;return 0;private static String parserToLatex(String name,String text)String latex =""name = name.toLowerCase();/分式if(name.equals("f")latex +=getFLatex(name, text);/根式else if(name.equals("r")latex +=getRLatex(name, text);/上下标else if(name.st

19、artsWith("s")latex+=getSLatex(name, text);/a矩阵al左对齐;ac居中;ar右对齐;con元素排成n歹U; vsn行间增加 n磅;hsn列间增加n磅else if(name.startsWith("a")latex += getALatex(name, text);else if(name.startsWith("b")latex+=getBLatex(name, text);/平移else if(name.startsWith("d")latex+=getDLatex(n

20、ame, text);/积分else if(name.startsWith("i")latex+=getILatex(name, text);/列表else if(name.startsWith("l")latex+=getLLatex(name, text);/重叠else if(name.startsWith("o")latex+=getOLatex(name, text);/框else if(name.startsWith("x")latex+=getXLatex(name, text);/空白else if

21、(StringUtils.isBlank(name)latex+=parserElements(text);elseSystem.err.println("error parserToLatex");return latex;/* 根式多次根式* param name name* param text text* return latex* /private static String getRLatex(String name,String text)String latex =""List<String> args = getArgs(t

22、ext); if(args.size()=2)latex +=" sqrt”;latex +=parserElements(args.get(0);latex +=""latex +=parserElements(args.get(1);latex +=""else if(args.size()=1)latex +=" sqrt"latex +=parserElements(args.get(0);latex +=""elseSystem.err.println("error getRLatex

23、"); return latex;/*分式param name nameparam text textreturn latex*/private static String getFLatex(String name,String text)String latex =""List<String> args = getArgs(text);if(args.size()=2)latex +=" frac"latex +=parserElements(args.get(0);latex +=""latex +=par

24、serElements(args.get(1);latex +=""elseSystem.err.println("error getFLatex");return latex;/* /a矩阵al左对齐;ac居中;ar右对齐;con元素排成 n歹U; vsn行间增加n 磅;hsn列间增加n磅param name nameparam text textreturn latex*/private static String getALatex(String name,String text)String latex =""List<

25、String> args =getArgs(text);String n = name.replaceAll("ASs*co(0-9*)Ss*$”, "$1");/列数int col = 1;if(n.matches("0-9*") col = Integer.valueOf(n);for(int i = 0 ;i<args.size();i+)if(i!=0&&i%col=0)latex += " "latex += parserElements(args.get(i);return latex

26、;/*左括号使用字符;右括号使用字符 ;左右括号都使用字符bc* param name name* param text text* return latex*/private static String getBLatex(String name,String text)String latex =""String lc=""String rc=""String bc=""List<String> args =getArgs(text);name = name.replace(LEFTBRACKETR

27、E,LEFTBRACKET).replace(RIGHTBRACKETRE, RIGHTBRACKET);if(name.contains("lc")|name.contains("rc")lc = name.replaceAll("AsS*lc(sS)sS*$", "$1");rc = name.replaceAll("AsS*rc(sS)sS*$”, "$1");lc = StringUtils.isBlank(lc)|lc.equals(name)?".":l

28、c;rc = StringUtils.isBlank(rc)|rc.equals(name)?".":rc;/方程组处理if(lc.equals("")&&rc.equals(".")latex+="begincases”;for(int i = 0 ;i<args.size();i+)if(i!=0)latex += " "latex += parserElements(args.get(i);latex+="endcases”;/矩阵处理elselc = lc.repl

29、ace("", "");rc = rc.replace("", "");latex+="left"+lc+"beginmatrix"for(int i = 0 ;i<args.size();i+)if(i!=0)latex += " "latex += parserElements(args.get(i);latex+="endmatrixright"+rc;else if(name.contains("bc"

30、)bc = name.replaceAll("sS*bc(sS)sS*$”, "$1");latex+="left"+bc+"beginmatrix"for(int i = 0 ;i<args.size();i+)if(i!=0)latex += " "latex += parserElements(args.get(i);latex+="endmatrixright"+bc;elseSystem.err.println("error getBLatex");

31、return latex;/*平移fon右边n磅;ban左边n磅;li为下一个字符前的空白添加下划线param name nameparam text textreturn latex*/private static String getDLatex(String name,String text) /TODOreturn parserElements(text);/*积分EQi (a , b, 3x+1 dx)* su生成求和公式pr生成求积公式in积分限不在符号的上下,而在符号之右* fcc将符号c设置为固定高度的字符vcc符号高度与第三个元素高度一致param name namepara

32、m text textreturn latex*/private static String getILatex(String name,String text) String latex =""String sym =""List<String> args = getArgs(text);if(args.size()=3)/默认上下标在符号上下/汇if(name.contains("su")sym = "sum"/上下标在右侧if(name.contains("in")sym+=&

33、quot;nolimits"/ nelse if(name.contains("pr")sym = "prod"if(name.contains("in")sym+="nolimits"/八ntelse if(name.contains("fc")sym = name.replace("sS重叠开关o():将每个后续元素置于前一个元素之上fc(F (*)sS*$”, "$1"); elsesym = "int"latex+=sym+&q

34、uot;_"+parserElements(args.get(0)+"A"+parserElements(args.get(1)+" "+parserElements(args.get(2);elseSystem.err.println("error getILatex");return latex;/可用参数: 左对齐al ;居中ac ;右对齐ar* param name* param text* return*/private static String getOLatex(String name,String text

35、)* l():使用任意个数的元素组成列表。* param name name* param text text* return latex* /*private static String getLLatex(String name,String text) return parserElements(text);String latex =""List<String> args = getArgs(text);if(args.size()>=2)String a1=parserElements(args.get(0);String a2=parserEl

36、ements(args.get(1);if(!a1.startsWith("(arg1,arg2,.) 返回参数列表)a1="A"+a1+"”;if(!a2.startsWith("_") a2="_"+a2+"”;latex+="!,"+a1+a2;for(int i=2;i<args.size();i+)latex+=parserElements(args.get(i);return latex;return parserElements(text);/* x():创建元素边

37、框。 * 可用参数:在元素的上面绘制一个边框to ;在元素的下面绘制一个边框 bo ;在元素的左面绘制一个边框le在元素的右面绘制一个边框ri* param name* param text* return latex*/private static String getXLatex(String name,String text)/fboxString latex = parserElements(text);boolean le =false;boolean ri =false;int i =0 ;if(name.contains("le")i+;le=true;if(

38、name.contains("ri")i+;ri = true;if(le|ri)latex = "left"+(le?T:".")+latex+"right”+(ri?T:".");if(name.contains("to")i+;latex = "overline"+latex+"”;if(name.contains("bo")i+;latex = "underline"+latex+"”;if(i=0

39、|i=4)latex ="fbox"+parserElements(text)+""return latex;/* param eqformula return*/private static List<String> getArgs(String eqformula)upn文字上移由n指定的磅数(默认值为2磅)din在段落一行之下添加由n指定的磅数的空白don将单个元素相对相邻文字下移由n指定的磅数。默认值为param name nameparam text textreturn latex*/private static String g

40、etSLatex(String name,String text)String latex =""List<String> args = getArgs(text);if(args.size()>=1&&StringUtils.isNotBlank(args.get(0) latex+="A"latex +=parserElements(args.get(0);latex +=""if(args.size()=2&&StringUtils.isNotBlank(args.get(1)

41、latex+="_"latex +=parserElements(args.get(1);latex +=""/*return latex;List<String> args=new ArrayList<String>();int leftbra = 0;int rightbra = 0;List<Integer> index = new ArrayList<Integer>();for(int i = 0 ; i<eqformula.length();i+)char c = eqformula.ch

42、arAt(i);if(c = ')') leftbra +;if(c ='(') rightbra+;if(leftbra=rightbra&&c=',') index.add(i);if(CollectionUtils.isEmpty(index)args.add(eqformula);elseint begin = 0 ;for(int i=0;i<index.size();i+) args.add(eqformula.substring(begin, index.get(i); begin = index.get(i)+1;args.add(eqformula.substring(begin,eqformula.length();return args;

展开阅读全文
相关资源
猜你喜欢
相关搜索

当前位置:首页 > 社会民生


经营许可证编号:宁ICP备18001539号-1