bib2xhtml.pl
Go to the documentation of this file.
1 #
2 # Below is a stripped down version of bib2xhtml used by doxygen.
3 # For the full version see http://www.spinellis.gr/sw/textproc/bib2xhtml/
4 #
5 # Convert from bibtex to XHTML.
6 #
7 # (C) Copyright 1995, 1996 David Hull.
8 # (David Hull / hull@cs.uiuc.edu / http://www.uiuc.edu/ph/www/dlhull)
9 #
10 # (C) Copyright 2002-2010 Diomidis Spinellis
11 # http://www.spinellis.gr
12 #
13 # This program is free software. You can redistribute it and/or modify
14 # it under the terms of the GNU General Public License. See the
15 # files README and COPYING for details.
16 #
17 # This source code contains UTF-8 characters. You might want to use
18 # an appropriate editor, if you want to view/modify the LaTeX to Unicode
19 # substitution commands.
20 #
21 
22 use Getopt::Std;
23 use open IO => ':crlf';
24 $label_styles{'numbered'} = $LABEL_NUMBERED = 2;
25 $list_start[$LABEL_NUMBERED] = 'dl class="citelist"';
26 $list_end[$LABEL_NUMBERED] = "/dl";
27 @tmpfiles = ();
28 sub html_ent {
29  s/\\i\b/i/g;
30  s/\\\'(\001\d+)\{([AEIOUaeiou])\1\}/&$2acute;/gs;
31  s/\\\'([AEIOUaeiou])/&$1acute;/g;
32  s/\\\`(\001\d+)\{([AEIOUaeiou])\1\}/&$2grave;/gs;
33  s/\\\`([AEIOUaeiou])/&$1grave;/g;
34  s/\\\"(\001\d+)\{([AEIOUaeiouy])\1\}/&$2uml;/gs;
35  s/\\\"([AEIOUaeiouy])/&$1uml;/g;
36  s/\\\~(\001\d+)\{([ANOano])\1\}/&$2tilde;/gs;
37  s/\\\~([ANOano])/&$1tilde;/g;
38  s/\\\^(\001\d+)\{([AEIOUaeiou])\1\}/&$2circ;/gs;
39  s/\\\^([AEIOUaeiou])/&$1circ;/g;
40  s/\\c(\001\d+)\{([Cc])\1\}/&$2cedil;/gs;
41  s/\\u(\001\d+)\{(.)\1\}/$2/gs;
42  s/\\v(\001\d+)\{(.)\1\}/$2/gs;
43  s/\\([lL])\b/$1/g;
44  s/\\\=(\001\d+)\{(.)\1\}/$2/gs;
45  s/\\\=(.)/$1/g;
46  s/\\\.(\001\d+)\{(.)\1\}/$2/gs;
47  s/\\\.(.)/$1/g;
48  s/\\([Oo])\b\s*/&$1slash;/g;
49  s/\\AA\b\s*/Å/g;
50  s/\\aa\b\s*/å/g;
51  s/\\AE\b\s*/Æ/g;
52  s/\\ae\b\s*/æ/g;
53  s/\\ss\b\s*/ß/g;
54  s/\\S\b\s*/§/g;
55  s/\\P\b\s*/¶/g;
56  s/\\pounds\b\s*/£/g;
57  s/\?\`/¿/g;
58  s/\!\`/¡/g;
59  s/\-\-\-/—/g;
60  s/([^\!])\-\-([^>])/$1–$2/g;
61  s/\\([aA]lpha)\b/&$1;/g;
62  s/\\([bB]eta)\b/&$1;/g;
63  s/\\([gG]amma)\b/&$1;/g;
64  s/\\([dD]elta)\b/&$1;/g;
65  s/\\varepsilon\b/ε/g;
66  s/\\([eE]psilon)\b/&$1;/g;
67  s/\\([zZ]eta)\b/&$1;/g;
68  s/\\([eE]ta)\b/&$1;/g;
69  s/\\([tT]heta)\b/&$1;/g;
70  s/\\vartheta\b/θ/g;
71  s/\\([iI]ota)\b/&$1;/g;
72  s/\\([kK]appa)\b/&$1;/g;
73  s/\\([lL]ambda)\b/&$1;/g;
74  s/\\([mM]u)\b/&$1;/g;
75  s/\\([nN]u)\b/&$1;/g;
76  s/\\([xX]i)\b/&$1;/g;
77  s/\\([oO]micron)\b/&$1;/g;
78  s/\\([pP]i)\b/&$1;/g;
79  s/\\varpi\b/π/g;
80  s/\\([rR]ho)\b/&$1;/g;
81  s/\\varrho\b/ρ/g;
82  s/\\([sS]igma)\b/&$1;/g;
83  s/\\varsigma\b/ς/g;
84  s/\\([tT]au)\b/&$1;/g;
85  s/\\([uU]psilon)\b/&$1;/g;
86  s/\\([pP]hi)\b/&$1;/g;
87  s/\\varphi\b/φ/g;
88  s/\\([cC]hi)\b/&$1;/g;
89  s/\\([pP]si)\b/&$1;/g;
90  s/\\([oO]mega)\b/&$1;/g;
91  s/\\S\b/§/g;
92  s/^\\circ\b/°/g;
93  s/\\infty\b/∞/g;
94  s/\\emptyset\b/∅/g;
95  s/\\pm\b/±/g;
96  s/\\times\b/×/g;
97  s/\\cdot\b/⋅/g;
98  s/\\partial\b/∂/g;
99  s/\\nabla\b/∇/g;
100  s/\\surd\b/√/g;
101  s/\\perp\b/⊥/g;
102  s/\\sum\b/∑/g;
103  s/\\int\b/∫/g;
104  s/\\prod\b/∏/g;
105  s/\\sim\b/∼/g;
106  s/\\approx\b/≈/g;
107  s/\\ne\b/≠/g;
108  s/\\equiv\b/≡/g;
109  s/\\propto\b/∝/g;
110  s/\\le\b/≤/g;
111  s/\\ge\b/≥/g;
112  s/\\leftarrow\b/←/g;
113  s/\\rightarrow\b/→/g;
114  s/\\in\b/∈/g;
115  s/\\notin\b/∉/g;
116  s/\\lceil\b/⌈/g;
117  s/\\rceil\b/⌉/g;
118  s/\\lfloor\b/⌊/g;
119  s/\\rfloor\b/⌋/g;
120 }
121 foreach (@ARGV) {
122  if (/\.bib$/) {
123  $bibfile = $_;
124  $bibfile =~ s/\.bib$//;
125  push(@bibfiles,$bibfile);
126  } else {
127  $htmlfile = $_;
128  }
129 }
130 exit(1) unless defined($htmlfile);
131 $bibdatacmd="\\bibdata{".join(',',@bibfiles)."}";
132 $label_style = $LABEL_NUMBERED;
133 $bstfile = "doxygen";
134 umask(077);
135 open(HTMLFILE,">$htmlfile$$");
136 if (open(OHTMLFILE, "$htmlfile")) {
137  $mode = (stat OHTMLFILE)[2] & 0xfff;
138 } else {
139  print "Error opening $htmlfile\n";
140  exit(1);
141 }
142 $beginstring = "<!-- BEGIN CITATIONS -->";
143 $endstring = "<!-- END CITATIONS -->";
144 @citations = ();
145 loop:
146 while (<OHTMLFILE>) {
147  print HTMLFILE;
148  last loop if m/^$beginstring$/;
149 }
150 loop:
151 while (<OHTMLFILE>) {
152  print HTMLFILE;
153  last loop if m/^$endstring$/;
154  push(@citations, $2) if m/^([^\\]*)?(.+\})(.*)?$/;
155 }
156 push(@citations, $bibdatacmd);
157 $auxfile = "bib$$";
158 push(@tmpfiles, "$auxfile.aux");
159 open(AUXFILE, ">$auxfile" . ".aux");
160 print AUXFILE "\\relax\n\\bibstyle{$bstfile}\n";
161 foreach $citation (@citations) {
162  print AUXFILE "$citation\n";
163 }
164 close(AUXFILE);
165 push(@tmpfiles, "$auxfile.blg");
166 push(@tmpfiles, "$auxfile.bbl");
167 `bibtex $auxfile 2>&1`;
168 if ($?==-1)
169 {
170  print "bibtex command failed: $!\n";
171 }
172 $beginstring = "<!-- BEGIN BIBLIOGRAPHY -->";
173 $endstring = "<!-- END BIBLIOGRAPHY -->";
174 loop:
175 while (<OHTMLFILE>) {
176  last loop if m/^$beginstring$/;
177  print HTMLFILE;
178 }
179 loop:
180 while (<OHTMLFILE>) {
181  last loop if m/^$endstring$/;
182 }
183 print HTMLFILE "$beginstring\n";
184 $t = $auxfile . ".bbl";
185 $/ = "";
186 open(BBLFILE, "<$t") || die "error opening $t: $!\n";
187 $nentry = 0;
188 loop:
189 while (<BBLFILE>) {
190  if (($nentry == 0) && (m/^#/)) {
191  if ((m/#\s*label-style:\s*(\S+)/) && (! defined $label_style)) {
192  $label_style = $label_styles{$1};
193  if (! defined $label_style) {
194  print STDERR "label style unknown: \n";
195  next loop;
196  }
197  }
198  next loop;
199  }
200  $nentry++;
201  ($bcite, $blabel) = m+<dt><a name=\"([^\"]*)\">\[([^\]]*)\]</a></dt><dd>+;
202  $blabel = "$nentry";
203  $bibcite{$bcite} = $blabel;
204 }
205 close(BBLFILE);
206 $label_style = $LABEL_DEFAULT if (! defined $label_style);
207 $list_start = $list_start[$label_style];
208 $list_end = $list_end[$label_style];
209 print HTMLFILE "<$list_start>\n\n";
210 open(BBLFILE, "<$t") || die "error opening $t: $!\n";
211 $nentry = 0;
212 loop:
213 while (<BBLFILE>) {
214  next loop if (($nentry == 0) && (m/^#/));
215  $nentry++;
216  s/\\\{/\002/g;
217  s/\\\}/\003/g;
218  s/\\\$/\004/g;
219  {
220  local ($c, $l, $z) = (0, 0, ());
221  s/([\{\}])/join("","\001",($1 eq "\{" ? $z[$l++]=$c++ : $z[--$l]),$1)/ge;
222  }
223  s/\%\n//g;
224  s/(\.(<\/cite>|<\/a>|\')+)\./$1/g;
225  s:(<dt><a name=\"[^\"]*\">\[)[^\]]*(\]</a></dt><dd>):$1$nentry$2:;
226  while (m/(\\(cite(label)?)(\001\d+)\{([^\001]+)\4\})/) {
227  $old = $1;
228  $cmd = $2;
229  $doxref = defined($3);
230  $bcite = $5;
231  if (! defined $bibcite{$bcite}) {
232  $blabel = " [" . $bcite . "]";
233  } elsif ($doxref) {
234  $blabel = " <a href=\"#$bcite\">[" . $bibcite{$bcite} . "]<\/a>";
235  } else {
236  $blabel = " [" . $bibcite{$bcite} . "]";
237  }
238  $old =~ s/(\W)/\\$1/g;
239  s/\s*$old/$blabel/g;
240  }
241  s/In (<a href=\"[^\"]*\">)([^\[]+) \[(\2)/In $1\[$2/;
242  s/\\htmladdnormallink(foot)?(\001\d+)\{([^\001]+)\2\}(\001\d+)\{([^\001]+)\4\}/<a href="$5">$3<\/a>/gs;
243  s/\&amp;/\005/g;
244  s/\\?&/&amp;/g;
245  s/\005/&amp;/g;
246  html_ent();
247  while (m/\\char([\'\"]?[0-9a-fA-F]+)/) {
248  $o = $r = $1;
249  if ($r =~ s/^\'//) {
250  $r = oct($r);
251  } elsif ($r =~ s/^\"//) {
252  $r = hex($r);
253  }
254  s/\\char$o\s*/&#$r;/g;
255  }
256  s/{\\etalchar\001(\d+)\{(.)}\001\1\}/$2/g;
257  s/\\par\b/<p \/>/g;
258  s/\\url(\001\d+)\{(.*)\1\}/<a href="$2">$2<\/a>/gs;
259  s/\\href(\001\d+)\{(.*)\1\}(\001\d+)\{([^\001]*)\3\}/<a href="$2">$4<\/a>/gs;
260  s/\\href(\001\d+)\{(.*)\1\}/<a href="$2">$2<\/a>/gs;
261  s/(\001\d+)\{\\rm\s+(.*)\1\}/$2/gs;
262  s/\\textrm(\001\d+)\{(.*)\1\}/$2/gs;
263  s/(\001\d+)\{\\em\s+(.*)\1\}/<em>$2<\/em>/gs;
264  s/(\001\d+)\{\\it\s+(.*)\1\}/<i>$2<\/i>/gs;
265  s/(\001\d+)\{\\bf\s+(.*)\1\}/<b>$2<\/b>/gs;
266  s/(\001\d+)\{\\tt\s+(.*)\1\}/<tt>$2<\/tt>/gs;
267  s/\\emph(\001\d+)\{(.*)\1\}/<em>$2<\/em>/gs;
268  s/\\textit(\001\d+)\{(.*)\1\}/<i>$2<\/i>/gs;
269  s/\\textbf(\001\d+)\{(.*)\1\}/<b>$2<\/b>/gs;
270  s/\\texttt(\001\d+)\{(.*)\1\}/<tt>$2<\/tt>/gs;
271  s/\\mathrm(\001\d+)\{(.*)\1\}/$2/gs;
272  s/\\mathnormal(\001\d+)\{(.*)\1\}/$2/gs;
273  s/\\mathsf(\001\d+)\{(.*)\1\}/$2/gs;
274  s/\\mathbf(\001\d+)\{(.*)\1\}/<b>$2<\/b>/gs;
275  s/\\mathcal(\001\d+)\{(.*)\1\}/<i>$2<\/i>/gs;
276  s/\\mathit(\001\d+)\{(.*)\1\}/<i>$2<\/i>/gs;
277  s/\\mathtt(\001\d+)\{(.*)\1\}/<tt>$2<\/tt>/gs;
278  s/\\bibxhtmlname(\001\d+)\{(.*)\1\}/$2/ges;
279  sub domath {
280  local($t) = @_;
281  $t =~ s/\^(\001\d+)\{\\circ\1\}/\&\#176;/gs;
282  $t =~ s/\^\\circ/\&\#176;/g;
283  $t =~ s/\^(\001\d+)\{(.*)\1\}/<sup>$2<\/sup>/gs;
284  $t =~ s/\^(\w)/<sup>$1<\/sup>/g;
285  $t =~ s/\_(\001\d+)\{(.*)\1\}/<sub>$2<\/sub>/gs;
286  $t =~ s/\_(\w)/<sub>$1<\/sub>/g;
287  $t;
288  }
289  s/(\$([^\$]+)\$)/&domath($2)/ge;
290  s/(\\\((([^\\]|\\[^\(\)])+)\\\))/&domath($2)/ge;
291  s/\\mbox(\001\d+)\{(.*)\1\}/$2/gs;
292  while (s/(<a href\=\"[^"]*?)\~/$1\005/g) { ; }
293  s/([^\\])~/$1&nbsp;/g;
294  s/\\\,/&thinsp;/g;
295  s/\\ldots\b/&hellip;/g;
296  s/\\dots\b/&hellip;/g;
297  s/\005/\~/g;
298  s/\\ / /g;
299  s/\\textasciitilde\b\s*/~/g;
300  s/\\([\#\&\%\~\_\^\|])/$1/g;
301  s/\\\W//g;
302  s/\001(\d+)\{\\[A-Za-z]+\001(\d+)\{([^\001]*)\001\2\}\001\1\}/$3/g;
303  s/\\([A-Za-z]+)/ $1 /g;
304  s+In <a href=\"[^\"]*\"></a>++;
305  s/\001\d+[\{\}]//gs;
306  tr/\002\003\004/{}$/;
307  print HTMLFILE $_;
308 }
309 close(BBLFILE);
310 print HTMLFILE "<$list_end>\n\n$endstring\n";
311 while (<OHTMLFILE>) {
312  print HTMLFILE;
313 }
314 close (OHTMLFILE);
315 close(HTMLFILE);
316 chmod($mode, "$htmlfile$$");
317 rename("$htmlfile$$", $htmlfile);
318 unlink(@tmpfiles);
319 exit(0);