public inbox archive for pandoc-discuss@googlegroups.com
 help / color / mirror / Atom feed
* LaTeX: parse thebibliography (patch)
@ 2022-01-13 17:58 Игорь Пашев
       [not found] ` <415779ca-0946-47b5-b15e-a82c2d99d168n-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org>
  0 siblings, 1 reply; 2+ messages in thread
From: Игорь Пашев @ 2022-01-13 17:58 UTC (permalink / raw)
  To: pandoc-discuss


[-- Attachment #1.1: Type: text/plain, Size: 654 bytes --]

Here is a patch which makes Pandoc parse the bibliography environment into 
a definition list. The patch includes a test showing the result.

I needed it for myself for self-contained LaTeX documents and hope somebody 
may find it useful too.

-- 
You received this message because you are subscribed to the Google Groups "pandoc-discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email to pandoc-discuss+unsubscribe-/JYPxA39Uh5TLH3MbocFF+G/Ez6ZCGd0@public.gmane.org
To view this discussion on the web visit https://groups.google.com/d/msgid/pandoc-discuss/415779ca-0946-47b5-b15e-a82c2d99d168n%40googlegroups.com.

[-- Attachment #1.2: Type: text/html, Size: 980 bytes --]

[-- Attachment #2: thebibliography.patch --]
[-- Type: text/x-patch, Size: 4881 bytes --]

From 12cce1758ff82cced76fa1961076f99176ea2689 Mon Sep 17 00:00:00 2001
From: Igor Pashev <pashev.igor-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Thu, 13 Jan 2022 19:11:50 +0200
Subject: LaTeX: parse thebibliography

---
 src/Text/Pandoc/Readers/LaTeX.hs         | 32 +++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  2 ++
 test/command/latex-thebibliography.md    | 48 ++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 test/command/latex-thebibliography.md

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 20a2db76b..e4a3aaa58 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -741,6 +741,14 @@ looseItem = do
   skipopts
   return mempty
 
+looseBibItem :: PandocMonad m => LP m Blocks
+looseBibItem = do
+  inListItem <- sInListItem <$> getState
+  guard $ not inListItem
+  skipopts
+  void braced
+  return mempty
+
 epigraph :: PandocMonad m => LP m Blocks
 epigraph = do
   p1 <- grouped block
@@ -886,6 +894,7 @@ blockCommands = M.fromList
    , ("strut", pure mempty)
    , ("rule", rule)
    , ("item", looseItem)
+   , ("bibitem", looseBibItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
    , ("caption", mempty <$ setCaption inline)
@@ -975,6 +984,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("togglefalse", braced >>= setToggle False)
    , ("iftoggle", try $ ifToggle >> block)
    , ("CSLReferences", braced >> braced >> env "CSLReferences" blocks)
+   , ("thebibliography", theBibliography)
    ]
 
 filecontents :: PandocMonad m => LP m Blocks
@@ -1211,6 +1221,28 @@ descItem = do
   bs <- blocks
   return (ils, [bs])
 
+bibItem :: PandocMonad m => LP m (Inlines, [Blocks])
+bibItem = do
+  blocks
+  controlSeq "bibitem"
+  sp
+  lbl <- opt <|> nextCite
+  cite_key <- untokenize <$> braced
+  bs <- blocks
+  return (lbl, [divWith (cite_key, [], []) bs])
+  where
+    nextCite = do
+      st <- getState
+      let n = sTheBibItemNum st + 1
+      setState st {sTheBibItemNum = n}
+      return . singleton . Str . T.pack . show $ n
+
+theBibliography :: PandocMonad m => LP m Blocks
+theBibliography =
+  divWith ("", ["thebibliography"], []) . definitionList <$>
+  listenv "thebibliography" (many bibItem)
+
+
 listenv :: PandocMonad m => Text -> LP m a -> LP m a
 listenv name p = try $ do
   oldInListItem <- sInListItem `fmap` getState
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9eb4a0cbc..8fb6bd5bc 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -172,6 +172,7 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sFileContents  :: M.Map Text Text
                             , sEnableWithRaw :: Bool
                             , sRawTokens     :: IntMap.IntMap [Tok]
+                            , sTheBibItemNum :: Int
                             }
      deriving Show
 
@@ -199,6 +200,7 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sFileContents  = M.empty
                               , sEnableWithRaw = True
                               , sRawTokens     = IntMap.empty
+                              , sTheBibItemNum = 0
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
diff --git a/test/command/latex-thebibliography.md b/test/command/latex-thebibliography.md
new file mode 100644
index 000000000..153fbfc13
--- /dev/null
+++ b/test/command/latex-thebibliography.md
@@ -0,0 +1,48 @@
+# The bibliography
+
+```
+% pandoc -f latex -t native
+\begin{thebibliography}{100}
+  \bibitem[One1990]{one} The First.
+  \bibitem{two} The Second.
+  \bibitem[Three 1998]{three} The Third.
+  \bibitem{four} The Fourth.
+\end{thebibliography}
+^D
+[ Div
+    ( "" , [ "thebibliography" ] , [] )
+    [ DefinitionList
+        [ ( [ Str "One1990" ]
+          , [ [ Div
+                  ( "one" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "First." ] ]
+              ]
+            ]
+          )
+        , ( [ Str "1" ]
+          , [ [ Div
+                  ( "two" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "Second." ] ]
+              ]
+            ]
+          )
+        , ( [ Str "Three" , Space , Str "1998" ]
+          , [ [ Div
+                  ( "three" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "Third." ] ]
+              ]
+            ]
+          )
+        , ( [ Str "2" ]
+          , [ [ Div
+                  ( "four" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "Fourth." ] ]
+              ]
+            ]
+          )
+        ]
+    ]
+]
+
+```
+

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: LaTeX: parse thebibliography (patch)
       [not found] ` <415779ca-0946-47b5-b15e-a82c2d99d168n-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org>
@ 2022-01-16 18:59   ` Игорь Пашев
  0 siblings, 0 replies; 2+ messages in thread
From: Игорь Пашев @ 2022-01-16 18:59 UTC (permalink / raw)
  To: pandoc-discuss


[-- Attachment #1.1: Type: text/plain, Size: 916 bytes --]

Here is an updated version which makes ordered list instead of definition 
list when possible.
Ordered lists are easier to render in other formats.

четверг, 13 января 2022 г. в 19:58:07 UTC+2, Игорь Пашев: 

> Here is a patch which makes Pandoc parse the bibliography environment into 
> a definition list. The patch includes a test showing the result.
>
> I needed it for myself for self-contained LaTeX documents and hope 
> somebody may find it useful too.
>

-- 
You received this message because you are subscribed to the Google Groups "pandoc-discuss" group.
To unsubscribe from this group and stop receiving emails from it, send an email to pandoc-discuss+unsubscribe-/JYPxA39Uh5TLH3MbocFF+G/Ez6ZCGd0@public.gmane.org
To view this discussion on the web visit https://groups.google.com/d/msgid/pandoc-discuss/8296a3c5-bd71-4b2c-8498-11903d7a0194n%40googlegroups.com.

[-- Attachment #1.2: Type: text/html, Size: 1451 bytes --]

[-- Attachment #2: thebibliography-dl-ol.patch --]
[-- Type: text/x-patch, Size: 5112 bytes --]

From f62f8b7ef2f1c2357fbd41f5226fe433e632e042 Mon Sep 17 00:00:00 2001
From: Igor Pashev <pashev.igor-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Date: Thu, 13 Jan 2022 19:11:50 +0200
Subject: LaTeX: parse thebibliography

---
 src/Text/Pandoc/Readers/LaTeX.hs         | 38 +++++++++++++++++++++++++
 src/Text/Pandoc/Readers/LaTeX/Parsing.hs |  2 ++
 test/command/latex-thebibliography.md    | 49 ++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+)
 create mode 100644 test/command/latex-thebibliography.md

diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs
index 20a2db76b..37fa4adf0 100644
--- a/src/Text/Pandoc/Readers/LaTeX.hs
+++ b/src/Text/Pandoc/Readers/LaTeX.hs
@@ -741,6 +741,14 @@ looseItem = do
   skipopts
   return mempty
 
+looseBibItem :: PandocMonad m => LP m Blocks
+looseBibItem = do
+  inListItem <- sInListItem <$> getState
+  guard $ not inListItem
+  skipopts
+  void braced
+  return mempty
+
 epigraph :: PandocMonad m => LP m Blocks
 epigraph = do
   p1 <- grouped block
@@ -886,6 +894,7 @@ blockCommands = M.fromList
    , ("strut", pure mempty)
    , ("rule", rule)
    , ("item", looseItem)
+   , ("bibitem", looseBibItem)
    , ("documentclass", skipopts *> braced *> preamble)
    , ("centerline", para . trimInlines <$> (skipopts *> tok))
    , ("caption", mempty <$ setCaption inline)
@@ -975,6 +984,7 @@ environments = M.union (tableEnvironments blocks inline) $
    , ("togglefalse", braced >>= setToggle False)
    , ("iftoggle", try $ ifToggle >> block)
    , ("CSLReferences", braced >> braced >> env "CSLReferences" blocks)
+   , ("thebibliography", theBibliography)
    ]
 
 filecontents :: PandocMonad m => LP m Blocks
@@ -1211,6 +1221,34 @@ descItem = do
   bs <- blocks
   return (ils, [bs])
 
+bibItem :: PandocMonad m => LP m (Inlines, [Blocks])
+bibItem = do
+  blocks
+  controlSeq "bibitem"
+  sp
+  lbl <- opt <|> nextNum
+  cite_key <- untokenize <$> braced
+  bs <- blocks
+  return (lbl, [divWith (cite_key, [], []) bs])
+  where
+    nextNum = do
+      st <- getState
+      let n = sTheBibItemNum st + 1
+      setState st {sTheBibItemNum = n}
+      return . str . T.pack . show $ n
+
+theBibliography :: PandocMonad m => LP m Blocks
+theBibliography = do
+  updateState $ \st -> st {sTheBibItemNum = 0}
+  items <- listenv "thebibliography" (many bibItem)
+  is_ol <- (== length items) . sTheBibItemNum <$> getState
+  return $
+    divWith
+      ("", ["thebibliography"], [])
+      (if is_ol
+         then orderedListWith (1, Decimal, Period) $ map (head . snd) items
+         else definitionList items)
+
 listenv :: PandocMonad m => Text -> LP m a -> LP m a
 listenv name p = try $ do
   oldInListItem <- sInListItem `fmap` getState
diff --git a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
index 9eb4a0cbc..8fb6bd5bc 100644
--- a/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
+++ b/src/Text/Pandoc/Readers/LaTeX/Parsing.hs
@@ -172,6 +172,7 @@ data LaTeXState = LaTeXState{ sOptions       :: ReaderOptions
                             , sFileContents  :: M.Map Text Text
                             , sEnableWithRaw :: Bool
                             , sRawTokens     :: IntMap.IntMap [Tok]
+                            , sTheBibItemNum :: Int
                             }
      deriving Show
 
@@ -199,6 +200,7 @@ defaultLaTeXState = LaTeXState{ sOptions       = def
                               , sFileContents  = M.empty
                               , sEnableWithRaw = True
                               , sRawTokens     = IntMap.empty
+                              , sTheBibItemNum = 0
                               }
 
 instance PandocMonad m => HasQuoteContext LaTeXState m where
diff --git a/test/command/latex-thebibliography.md b/test/command/latex-thebibliography.md
new file mode 100644
index 000000000..54b257c61
--- /dev/null
+++ b/test/command/latex-thebibliography.md
@@ -0,0 +1,49 @@
+# The bibliography
+
+```
+% pandoc -f latex -t native
+\begin{thebibliography}{10}
+  \bibitem{two} The Second.
+  \bibitem{four} The Fourth.
+\end{thebibliography}
+\begin{thebibliography}{100}
+  \bibitem[One1990]{one} The First.
+  \bibitem{two} The Second.
+\end{thebibliography}
+^D
+[ Div
+    ( "" , [ "thebibliography" ] , [] )
+    [ OrderedList
+        ( 1 , Decimal , Period )
+        [ [ Div
+              ( "two" , [] , [] )
+              [ Para [ Str "The" , Space , Str "Second." ] ]
+          ]
+        , [ Div
+              ( "four" , [] , [] )
+              [ Para [ Str "The" , Space , Str "Fourth." ] ]
+          ]
+        ]
+    ]
+, Div
+    ( "" , [ "thebibliography" ] , [] )
+    [ DefinitionList
+        [ ( [ Str "One1990" ]
+          , [ [ Div
+                  ( "one" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "First." ] ]
+              ]
+            ]
+          )
+        , ( [ Str "1" ]
+          , [ [ Div
+                  ( "two" , [] , [] )
+                  [ Para [ Str "The" , Space , Str "Second." ] ]
+              ]
+            ]
+          )
+        ]
+    ]
+]
+```
+

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-01-16 18:59 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-13 17:58 LaTeX: parse thebibliography (patch) Игорь Пашев
     [not found] ` <415779ca-0946-47b5-b15e-a82c2d99d168n-/JYPxA39Uh5TLH3MbocFFw@public.gmane.org>
2022-01-16 18:59   ` Игорь Пашев

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).