Skip to content

Commit f53c28f

Browse files
authored
Merge pull request #32 from source-academy/xml2latex
Xml2latex
2 parents 23fccbf + 3cf1dbd commit f53c28f

File tree

109 files changed

+3574
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+3574
-0
lines changed

.babelrc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"presets": ["es2015", "stage-2"],
3+
"plugins": []
4+
}

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# dependencies
2+
/node_modules
3+
14
## emacs
25
*~
36

@@ -13,5 +16,6 @@ rails/out
1316

1417
# generated files #
1518
###################
19+
/latex
1620
rails-html
1721
sicp.zip

README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,17 @@ Static site generation is handled by `rails/Rakefile`. It outputs the html pages
5858
## Bugs
5959
* Google Custom Search doesn't seem to be working.
6060
* The snippets containing '\n' would lead to newline rather than showing charater '\n' in the `Source Academy` platform. This problem is difficult to eliminate because the code encoder cannot differentiate them when reading content.
61+
62+
## XML2Latex
63+
64+
## Requirements
65+
For development & deployment:
66+
* node.js
67+
68+
## Set up
69+
Run `npm install` to install dependencies.
70+
71+
## Generating Latex Files
72+
Run `npm start`.
73+
Latex files will be in the latex folder.
74+
Compile main.tex with XeLaTex+MakeIndex+BibTex for the pdf version.

nodejs/index.js

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
const fs = require('fs');
2+
const path = require('path');
3+
4+
const xpath = require('xpath');
5+
const dom = require('xmldom').DOMParser;
6+
7+
import parseXML from './parseXML.js';
8+
9+
const inputDir = path.join(__dirname, '../xml');
10+
const outputDir = path.join(__dirname, '../latex');
11+
12+
const preamble = `\\documentclass{report}
13+
14+
\\usepackage{amsmath}
15+
\\usepackage{amssymb}
16+
\\usepackage{cprotect}
17+
\\usepackage{csquotes}
18+
\\usepackage{epigraph}
19+
\\usepackage{etoolbox}
20+
\\usepackage{graphicx}
21+
\\usepackage{listings}
22+
\\usepackage{makeidx}
23+
\\usepackage{subcaption}
24+
\\usepackage{underscore}
25+
26+
\\graphicspath{ {../rails/public/chapters/} }
27+
28+
\\expandafter\\patchcmd\\csname \\string\\lstinline\\endcsname{%
29+
\\leavevmode
30+
\\bgroup
31+
}{%
32+
\\leavevmode
33+
\\ifmmode\\hbox\\fi
34+
\\bgroup
35+
}{}{%
36+
\\typeout{Patching of \\string\\lstinline\\space failed!}%
37+
}
38+
39+
\\lstdefinelanguage{JavaScript}{
40+
keywords={const, break, case, catch, continue, debugger, default, delete, do, else, finally, for, function, if, in, instanceof, new, return, switch, this, throw, try, typeof, var, void, while, with},
41+
morecomment=[l]{//},
42+
morecomment=[s]{/*}{*/},
43+
morestring=[b]',
44+
morestring=[b]",
45+
sensitive=true
46+
}
47+
48+
\\lstset{
49+
language=JavaScript,
50+
basicstyle=\\ttfamily,
51+
showstringspaces=false,
52+
showspaces=false,
53+
escapechar={^}
54+
}
55+
56+
\\newcommand{\\lt}{\\symbol{"3C}}% Less than
57+
\\newcommand{\\gt}{\\symbol{"3E}}% Greater than
58+
59+
\\setlength\\epigraphwidth{11cm}
60+
\\setlength\\epigraphrule{0pt}
61+
62+
\\makeindex
63+
64+
\\begin{document}\n`;
65+
66+
const ending = `\n\\printindex\n
67+
\\end{document}`;
68+
69+
const ensureDirectoryExists = (path, cb) => {
70+
fs.mkdir(path, (err) => {
71+
if (err) {
72+
if (err.code == 'EEXIST') cb(null); // ignore the error if the folder already exists
73+
else cb(err); // something else went wrong
74+
} else cb(null); // successfully created folder
75+
});
76+
}
77+
78+
const xmlToLatex = (filepath, filename) => {
79+
const fullFilepath = path.join(inputDir, filepath, filename);
80+
fs.open(fullFilepath, 'r', (err, fileToRead) => {
81+
if (err) {
82+
console.log(err);
83+
return;
84+
}
85+
fs.readFile(fileToRead, {encoding: 'utf-8'}, (err,data) => {
86+
if (err) {
87+
console.log(err);
88+
return;
89+
}
90+
const doc = new dom().parseFromString(data);
91+
const writeTo = [];
92+
93+
parseXML(doc.documentElement, writeTo);
94+
ensureDirectoryExists(path.join(outputDir, filepath), (err) => {
95+
if (err) {
96+
console.log(err);
97+
return;
98+
}
99+
const outputFile = path.join(outputDir, filepath, filename.replace(/\.xml$/, '') + '.tex');
100+
const stream = fs.createWriteStream(outputFile);
101+
stream.once('open', (fd) => {
102+
stream.write(writeTo.join(""));
103+
stream.end();
104+
});
105+
});
106+
});
107+
});
108+
}
109+
110+
const recursiveXmlToLatex = (filepath) => {
111+
const fullPath = path.join(inputDir, filepath);
112+
fs.readdir(fullPath, (err, files) => {
113+
files.forEach(file => {
114+
if (file.match(/\.xml$/)) {
115+
// console.log(file + " being processed");
116+
xmlToLatex(filepath, file);
117+
}
118+
else if (fs.lstatSync(path.join(fullPath, file)).isDirectory()){
119+
recursiveXmlToLatex(path.join(filepath, file));
120+
}
121+
});
122+
});
123+
}
124+
125+
const createMainLatex = () => {
126+
const chaptersFound = [];
127+
const files = fs.readdirSync(inputDir);
128+
files.forEach(file => {
129+
if (file.match(/chapter/)) {
130+
chaptersFound.push(file);
131+
}
132+
});
133+
if (!fs.existsSync(outputDir)) {
134+
fs.mkdirSync(outputDir);
135+
}
136+
const stream = fs.createWriteStream(path.join(outputDir, "main.tex"));
137+
stream.once('open', (fd) => {
138+
stream.write(preamble);
139+
chaptersFound.forEach(chapter => {
140+
const pathStr = chapter + "/" + chapter + ".tex";
141+
stream.write("\\input{" + pathStr + "}\n");
142+
});
143+
stream.write(ending);
144+
stream.end();
145+
});
146+
}
147+
148+
const main = () => {
149+
createMainLatex();
150+
recursiveXmlToLatex('');
151+
}
152+
153+
main();

nodejs/parseText.js

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
import replaceTagWithSymbol from './replaceTagWithSymbol';
2+
import processFigure from './processFigure';
3+
4+
const tagsToRemove = new Set(["#comment", "COMMENT", "CHANGE", "EXCLUDE", "HISTORY", "SCHEME", "SCHEMEINLINE", "EXERCISE", "SOLUTION"]);
5+
const ignoreTags = new Set(["JAVASCRIPT", "SPLIT", "SPLITINLINE", "NOBR"]);
6+
7+
export const processTextFunctions = {
8+
"#text": ((node, writeTo) => {
9+
const trimedValue = node.nodeValue.replace(/[\r\n]+/, " ").replace(/\s+/g, " ");
10+
if (!trimedValue.match(/^\s*$/)) {
11+
writeTo.push(trimedValue.replace(/%/g, "\\%"));
12+
}
13+
}),
14+
15+
"B": ((node, writeTo) => {
16+
writeTo.push("\\textbf{");
17+
recursiveProcessText(node.firstChild, writeTo);
18+
writeTo.push("}");
19+
}),
20+
21+
"BLOCKQUOTE": ((node, writeTo) => {
22+
writeTo.push("\n\\begin{quote}");
23+
recursiveProcessText(node.firstChild, writeTo);
24+
writeTo.push("\\end{quote}\n");
25+
}),
26+
27+
"EM": ((node, writeTo) => processTextFunctions["em"](node, writeTo)),
28+
"em": ((node, writeTo) => {
29+
writeTo.push("{\\em ");
30+
recursiveProcessText(node.firstChild, writeTo);
31+
writeTo.push("}");
32+
}),
33+
34+
"FIGURE": ((node, writeTo) => {
35+
processFigure(node, writeTo);
36+
}),
37+
38+
"IMAGE": ((node, writeTo) => {
39+
writeTo.push("\n\\includegraphics{"
40+
+ node.getAttribute("src").replace(/\.gif$/, ".png").replace(/_/g, "\\string_")
41+
+ "}\n");
42+
}),
43+
44+
"FOOTNOTE": ((node, writeTo) => {
45+
writeTo.push("\n\\cprotect\\footnote{");
46+
recursiveProcessText(node.firstChild, writeTo);
47+
writeTo.push("}\n");
48+
}),
49+
50+
"INDEX": ((node, writeTo) => {
51+
processIndex(node, writeTo);
52+
}),
53+
54+
"LABEL": ((node, writeTo) => {
55+
writeTo.push("\\label{"
56+
+ node.getAttribute("NAME")
57+
+ "}\n");
58+
}),
59+
60+
"LATEX": ((node, writeTo) => processTextFunctions["LATEXINLINE"](node, writeTo)),
61+
"LATEXINLINE": ((node, writeTo) => {
62+
recursiveProcessPureText(node.firstChild, writeTo);
63+
}),
64+
65+
"NAME": ((node, writeTo) => {
66+
recursiveProcessText(node.firstChild, writeTo);
67+
writeTo.push("}\n");
68+
}),
69+
70+
"OL": ((node, writeTo) => {
71+
writeTo.push("\n\\begin{enumerate}\n");
72+
processList(node.firstChild, writeTo);
73+
writeTo.push("\\end{enumerate}\n");
74+
}),
75+
76+
"P": ((node, writeTo) => processTextFunctions["TEXT"](node, writeTo)),
77+
"TEXT": ((node, writeTo) => {
78+
writeTo.push("\n\n");
79+
recursiveProcessText(node.firstChild, writeTo);
80+
writeTo.push("\n");
81+
}),
82+
83+
"QUOTE": ((node, writeTo) => {
84+
writeTo.push("\\enquote{");
85+
recursiveProcessText(node.firstChild, writeTo);
86+
writeTo.push("}");
87+
}),
88+
89+
"REF": ((node, writeTo) => {
90+
writeTo.push("~\\ref{"
91+
+ node.getAttribute("NAME")
92+
+ "}");
93+
}),
94+
95+
"SCHEMEINLINE": ((node, writeTo) => processTextFunctions["JAVASCRIPTINLINE"](node, writeTo)),
96+
"JAVASCRIPTINLINE": ((node, writeTo) => {
97+
writeTo.push("\\lstinline|");
98+
recursiveProcessPureText(node.firstChild, writeTo, true);
99+
writeTo.push("|");
100+
}),
101+
102+
"SNIPPET": ((node, writeTo) => {
103+
processSnippet(node, writeTo);
104+
}),
105+
106+
"SUBHEADING": ((node, writeTo) => {
107+
writeTo.push("\\subsubsection{");
108+
recursiveProcessText(node.firstChild, writeTo);
109+
}),
110+
111+
"UL": ((node, writeTo) => {
112+
writeTo.push("\n\\begin{itemize}\n");
113+
processList(node.firstChild, writeTo);
114+
writeTo.push("\\end{itemize}\n");
115+
})
116+
}
117+
118+
export const processList = (node, writeTo) => {
119+
if (!node) return;
120+
if (node.nodeName == "LI"){
121+
writeTo.push("\\item{");
122+
recursiveProcessText(node.firstChild, writeTo)
123+
writeTo.push("}\n");
124+
}
125+
return processList(node.nextSibling, writeTo);
126+
}
127+
128+
export const processSnippet = (node, writeTo) => {
129+
const jsSnippet = node.getElementsByTagName("JAVASCRIPT")[0];
130+
if (jsSnippet) {
131+
writeTo.push("\n\\begin{lstlisting}");
132+
recursiveProcessPureText(jsSnippet.firstChild, writeTo);
133+
writeTo.push("\\end{lstlisting}\n");
134+
}
135+
}
136+
137+
const recursiveProcessPureText = (node, writeTo, removeNewline = false) => {
138+
if (!node) return;
139+
if (!replaceTagWithSymbol(node, writeTo)) {
140+
if (removeNewline) {
141+
writeTo.push(node.nodeValue.replace(/[\r\n]+/g, " "));
142+
} else {
143+
writeTo.push(node.nodeValue);
144+
}
145+
}
146+
return recursiveProcessPureText(node.nextSibling, writeTo)
147+
}
148+
149+
export const recursiveProcessText = (node, writeTo) => {
150+
if (!node) return;
151+
if (!processText(node, writeTo)){
152+
console.log("recusive process:\n" + node.toString());
153+
}
154+
return recursiveProcessText(node.nextSibling, writeTo)
155+
}
156+
157+
export const processText = (node, writeTo) => {
158+
const name = node.nodeName;
159+
if (processTextFunctions[name]) {
160+
processTextFunctions[name](node, writeTo);
161+
return true;
162+
} else {
163+
if (replaceTagWithSymbol(node, writeTo) || tagsToRemove.has(name)) {
164+
return true;
165+
} else if (ignoreTags.has(name)) {
166+
recursiveProcessText(node.firstChild, writeTo);
167+
return true;
168+
} else {
169+
return false;
170+
}
171+
}
172+
}
173+
174+
export const processIndex = (index, writeTo) => {
175+
writeTo.push("\\index{");
176+
for (let child = index.firstChild; child; child = child.nextSibling) {
177+
const name = child.nodeName;
178+
switch (name) {
179+
case "SUBINDEX":
180+
writeTo.push("!");
181+
recursiveProcessText(child.firstChild, writeTo);
182+
break;
183+
184+
default:
185+
processText(child, writeTo);
186+
}
187+
}
188+
writeTo.push("}");
189+
}
190+

0 commit comments

Comments
 (0)