remove docs and update readme

This commit is contained in:
uskovgs 2025-06-08 23:40:21 +03:00
parent 39f4e9bf6c
commit e86ccb4091
9 changed files with 613 additions and 171 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@ __pycache__/
.quarto .quarto
reference/ reference/
tests/ tests/
examples.py

545
README.html Normal file
View File

@ -0,0 +1,545 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.7.30">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>readme</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
html { -webkit-text-size-adjust: 100%; }
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="README_files/libs/clipboard/clipboard.min.js"></script>
<script src="README_files/libs/quarto-html/quarto.js" type="module"></script>
<script src="README_files/libs/quarto-html/tabsets/tabsets.js" type="module"></script>
<script src="README_files/libs/quarto-html/popper.min.js"></script>
<script src="README_files/libs/quarto-html/tippy.umd.min.js"></script>
<script src="README_files/libs/quarto-html/anchor.min.js"></script>
<link href="README_files/libs/quarto-html/tippy.css" rel="stylesheet">
<link href="README_files/libs/quarto-html/quarto-syntax-highlighting-de070a7b0ab54f8780927367ac907214.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="README_files/libs/bootstrap/bootstrap.min.js"></script>
<link href="README_files/libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="README_files/libs/bootstrap/bootstrap-81267100e462c21b3d6c0d5bf76a3417.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
</head>
<body class="fullcontent quarto-light">
<div id="quarto-content" class="page-columns page-rows-contents page-layout-article">
<main class="content" id="quarto-document-content">
<section id="srgweb" class="level1">
<h1>srgweb</h1>
<p>Python client for SRG web services</p>
<section id="installation" class="level2">
<h2 class="anchored" data-anchor-id="installation">Installation</h2>
<p>Install the latest version from the repository:</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pip</span> install git+https://github.com/uskovgs/srgweb/</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="working-with-httpswww.srg.cosmos.rutriton" class="level2">
<h2 class="anchored" data-anchor-id="working-with-httpswww.srg.cosmos.rutriton">Working with https://www.srg.cosmos.ru/triton/</h2>
<p>To avoid entering your password in the terminal, you can store your token securely using the <a href="https://pypi.org/project/keyring/">keyring</a> package:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># ! pip install keyring</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> keyring</span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Save your token (one time)</span></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>keyring.set_password(<span class="st">"MY_TOKEN_NAME"</span>, <span class="st">"username"</span>, <span class="st">"12345"</span>)</span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="co"># get your passrd</span></span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>keyring.get_password(<span class="st">"MY_TOKEN_NAME"</span>, <span class="st">"username"</span>)</span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Out: 12345</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
<p>This way, your password/token is not stored in your scripts or visible in the terminal.</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> srgweb.triton <span class="im">import</span> (</span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a> triton_session, </span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a> list_programs, </span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a> get_program,</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a> list_baskets,</span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a> get_basket</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> keyring</span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="co"># login to triton</span></span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a>sess <span class="op">=</span> triton_session(</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a> username <span class="op">=</span> <span class="st">"username"</span>, </span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a> password <span class="op">=</span> keyring.get_password(<span class="st">"MY_TOKEN_NAME"</span>, <span class="st">"username"</span>)</span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a><span class="co"># list available programs</span></span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a>programs <span class="op">=</span> list_programs(sess)</span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a><span class="co"># download program SRGA</span></span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> get_program(sess, program<span class="op">=</span><span class="st">"SRGA"</span>)</span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a><span class="co"># list available baskets</span></span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a>baskets <span class="op">=</span> list_baskets(sess)</span>
<span id="cb3-21"><a href="#cb3-21" aria-hidden="true" tabindex="-1"></a><span class="co"># download basket ART-XC agns</span></span>
<span id="cb3-22"><a href="#cb3-22" aria-hidden="true" tabindex="-1"></a>df_basket <span class="op">=</span> get_basket(sess, basket<span class="op">=</span><span class="st">'ART-XC agns'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="working-with-httpswww.srg.cosmos.rupublications" class="level2">
<h2 class="anchored" data-anchor-id="working-with-httpswww.srg.cosmos.rupublications">Working with https://www.srg.cosmos.ru/publications/</h2>
<div class="sourceCode" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> srgweb.publications <span class="im">import</span> get_srg_publications</span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Get a list of publications</span></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a>publications <span class="op">=</span> get_srg_publications()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
</section>
<section id="passwords-and-keyring-usage" class="level2">
<h2 class="anchored" data-anchor-id="passwords-and-keyring-usage">Passwords and keyring usage</h2>
</section>
</section>
</main>
<!-- /main column -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const codeEl = trigger.previousElementSibling.cloneNode(true);
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
</div> <!-- /content -->
</body></html>

View File

@ -1,10 +1,12 @@
# srgweb: Python client for SRG web services # srgweb
Python client for SRG web services: triton and publications.
## Installation ## Installation
Install the latest version from the repository: Install the latest version from the repository:
```bash ``` bash
pip install git+https://github.com/uskovgs/srgweb/ pip install git+https://github.com/uskovgs/srgweb/
``` ```
@ -18,23 +20,45 @@ from srgweb.triton import (
list_baskets, list_baskets,
get_basket get_basket
) )
import keyring
# login to triton # login to triton
sess = triton_session("uskov", keyring.get_password("PLAN_SRG", "")) session = triton_session("username", "password")
# list available programs # list available programs
programs = list_programs(sess) programs = list_programs(session)
# download program SRGA
df = get_program(sess, program="SRGA") # download program "SRGA" (case insensitive)
df = get_program(session, program="srga")
# list available baskets # list available baskets
baskets = list_baskets(sess) baskets = list_baskets(session)
# download basket ART-XC agns # download basket ART-XC agns
df_basket = get_basket(sess, basket='ART-XC agns') df_basket = get_basket(session, basket='ART-XC agns')
``` ```
To avoid entering your password in the terminal, you can store your token securely using the [keyring](https://pypi.org/project/keyring/) package:
``` python
# ! pip install keyring
import keyring
# Save your token (one time)
keyring.set_password("MY_TOKEN_NAME", "username", "12345")
# Now you can use the keyring to get your password/token in your script
from srgweb.triton import triton_session
session = triton_session(
username = "username",
password = keyring.get_password("MY_TOKEN_NAME", "username")
)
```
This way, your password/token is not stored in your scripts or visible in the terminal.
## Working with https://www.srg.cosmos.ru/publications/ ## Working with https://www.srg.cosmos.ru/publications/
```python ``` python
from srgweb.publications import get_srg_publications from srgweb.publications import get_srg_publications
# Get a list of publications # Get a list of publications

View File

@ -1,10 +0,0 @@
quartodoc:
style: pkgdown
dir: reference
package: quartodoc
sections:
- title: Some functions
desc: Functions to inspect docstrings.
contents:
- get_object
- preview

View File

@ -1,13 +1,18 @@
[project] [project]
name = "srgweb" name = "srgweb"
version = "0.1.0" version = "0.1.0"
description = "python interface to internal web services" description = "Python client for SRG web services"
authors = [ authors = [
{name = "uskovgs",email = "uskov@cosmos.ru"} {name = "uskovgs",email = "uskov@cosmos.ru"}
] ]
readme = "README.md" readme = "README.md"
requires-python = ">=3.12" requires-python = ">=3.9"
dependencies = [ dependencies = [
"requests",
"beautifulsoup4",
"pandas",
"rich",
"pyjanitor"
] ]

View File

@ -1,41 +0,0 @@
# get_object { #quartodoc.get_object }
```python
get_object(
path,
object_name=None,
parser='numpy',
load_aliases=True,
dynamic=False,
loader=None,
)
```
Fetch a griffe object.
## Parameters {.doc-section .doc-section-parameters}
| Name | Type | Description | Default |
|--------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------|
| path | str | An import path to the object. This should have the form `path.to.module:object`. For example, `quartodoc:get_object` or `quartodoc:MdRenderer.render`. | _required_ |
| object_name | \'str \| None\' | (Deprecated). A function name. | `None` |
| parser | str | A docstring parser to use. | `'numpy'` |
| load_aliases | | For aliases that were imported from other modules, should we load that module? | `True` |
| dynamic | | Whether to dynamically import object. Useful if docstring is not hard-coded, but was set on object by running python code. | `False` |
## See Also {.doc-section .doc-section-see-also}
preview: print a user-friendly preview of a griffe object.
## Examples {.doc-section .doc-section-examples}
```python
>>> get_function("quartodoc", "get_function")
<Function('get_function', ...
```
## Returns {.doc-section .doc-section-returns}
| Name | Type | Description |
|--------|-----------|---------------|
| x | dc.Object | abc |

View File

@ -1,10 +0,0 @@
# Function reference {.doc .doc-index}
## Some functions
Functions to inspect docstrings.
| | |
| --- | --- |
| [get_object](get_object.qmd#quartodoc.get_object) | Fetch a griffe object. |
| [preview](preview.qmd#quartodoc.preview) | Print a friendly representation of a griffe object (e.g. function, docstring) |

View File

@ -1,24 +0,0 @@
# preview { #quartodoc.preview }
```python
preview(ast, max_depth=999, compact=False, as_string=False)
```
Print a friendly representation of a griffe object (e.g. function, docstring)
## Examples {.doc-section .doc-section-examples}
```python
>>> from quartodoc import get_object
>>> obj = get_object("quartodoc", "get_object")
```
```python
>>> preview(obj.docstring.parsed)
...
```
```python
>>> preview(obj)
...
```

View File

@ -1,62 +1,33 @@
"""Utility functions to scrape SRG publication & telegram pages.
Dependencies
------------
- requests
- beautifulsoup4
- pandas
- rich (optional, for nice progress)
Example
-------
>>> from srg_publications import (
... parse_srg_paper_links,
... get_df_from_srg_papers,
... get_df_from_srg_telegrams,
... )
>>> df_papers = get_df_from_srg_papers()
>>> df_tg = get_df_from_srg_telegrams()
"""
from __future__ import annotations
import re import re
from typing import List, Dict
import requests import requests
from bs4 import BeautifulSoup, Tag from bs4 import BeautifulSoup
import pandas as pd import pandas as pd
from rich.progress import Progress, SpinnerColumn, TextColumn from rich.console import Console
BASE_SITE = "https://www.srg.cosmos.ru" BASE_SITE = "https://www.srg.cosmos.ru"
PUBLICATIONS_URL = f"{BASE_SITE}/publications/" PUBLICATIONS_URL = f"{BASE_SITE}/publications/"
TELEGRAMS_ATEL_URL = f"{BASE_SITE}/publications/telegrams/atel"
TELEGRAMS_GCN_URL = f"{BASE_SITE}/publications/telegrams/gcn" console = Console()
def clear_arxiv_link(arxiv_abs_link: str | None) -> str | None: def _clear_arxiv_link(arxiv_abs_link: str | None) -> str | None:
"""Normalise an arXiv *abs* URL into canonical `<id>` form. """Normalise an arXiv *abs* URL into canonical `<id>` form.
Examples Examples
-------- --------
>>> clear_arxiv_link("https://arxiv.org/abs/2301.01234v2") >>> _clear_arxiv_link("https://arxiv.org/abs/2301.01234v2")
'2301.01234' '2301.01234'
>>> clear_arxiv_link("arXiv:2209.00001v1") >>> _clear_arxiv_link("arXiv:2209.00001v1")
'2209.00001' '2209.00001'
""" """
if not arxiv_abs_link: if not arxiv_abs_link:
return None return None
# remove version suffix like v2
cleaned = re.sub(r"v\d+$", "", arxiv_abs_link.strip()) cleaned = re.sub(r"v\d+$", "", arxiv_abs_link.strip())
# remove protocol and prefix
cleaned = re.sub(r"https?://arxiv\.org/abs/", "", cleaned) cleaned = re.sub(r"https?://arxiv\.org/abs/", "", cleaned)
cleaned = cleaned.replace("arXiv:", "") cleaned = cleaned.replace("arXiv:", "")
return cleaned return cleaned
# ----------------------------------------------------------------------------
# 1. Publication list helpers
# ----------------------------------------------------------------------------
def _session_for(url: str) -> requests.Session: def _session_for(url: str) -> requests.Session:
sess = requests.Session() sess = requests.Session()
sess.headers.update({ sess.headers.update({
@ -67,7 +38,7 @@ def _session_for(url: str) -> requests.Session:
return sess return sess
def parse_srg_paper_links(page_url: str) -> dict[str, str | None]: def _parse_srg_paper_links(page_url: str) -> dict[str, str | None]:
"""Parse individual SRG paper page and return arXiv + ADS links. """Parse individual SRG paper page and return arXiv + ADS links.
Parameters Parameters
@ -84,16 +55,15 @@ def parse_srg_paper_links(page_url: str) -> dict[str, str | None]:
soup = BeautifulSoup(sess.get(page_url).text, "html.parser") soup = BeautifulSoup(sess.get(page_url).text, "html.parser")
paper_links = [a.get("href") for a in soup.select("li a[href]")] paper_links = [a.get("href") for a in soup.select("li a[href]")]
arxiv_link = next((l for l in paper_links if "arxiv.org/abs" in l), None) arxiv_link = next((link for link in paper_links if link and "arxiv.org/abs" in link), None)
adsabs_link = next((l for l in paper_links if "ui.adsabs.harvard.edu" in l), None) adsabs_link = next((link for link in paper_links if link and "ui.adsabs.harvard.edu" in link), None)
return { return {
"srg_arxiv_url": arxiv_link, "srg_arxiv_url": arxiv_link,
"srg_bibcode": adsabs_link, "srg_bibcode": adsabs_link,
} }
def get_srg_publications(progress: bool = True) -> pd.DataFrame: def get_srg_publications() -> pd.DataFrame:
"""Scrape the main publications page and return a DataFrame. """Scrape the main publications page and return a DataFrame.
Columns Columns
@ -106,50 +76,32 @@ def get_srg_publications(progress: bool = True) -> pd.DataFrame:
""" """
sess = _session_for(PUBLICATIONS_URL) sess = _session_for(PUBLICATIONS_URL)
soup = BeautifulSoup(sess.get(PUBLICATIONS_URL).text, "html.parser") with console.status("Loading SRG publications page", spinner="dots"):
soup = BeautifulSoup(sess.get(PUBLICATIONS_URL).text, "html.parser")
# Remove buttons that interfere with finding <a> # Remove buttons that interfere with finding <a>
for btn in soup.select(".btn"): for btn in soup.select(".btn"):
btn.decompose() btn.decompose()
anchors = soup.select("tbody a") anchors = soup.select("tbody a")
titles: List[str] = [a.select_one("strong").text.strip() for a in anchors] titles: list[str] = [a.select_one("strong").text.strip() for a in anchors]
page_urls: List[str] = [BASE_SITE + a.get("href") for a in anchors] page_urls: list[str] = [BASE_SITE + a.get("href") for a in anchors]
iterator = zip(titles, page_urls) iterator = zip(titles, page_urls)
records: List[Dict[str, str | None]] = [] records: list[dict[str, str | None]] = []
if progress:
bar = Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}"), transient=True)
task_desc = "Parsing arXiv/ADS links"
with bar:
t = bar.add_task(task_desc, total=len(titles))
for title, link in iterator:
links = parse_srg_paper_links(link)
rec = {
"title_srg": title,
"page_srg": link,
**links,
}
rec["srg_arxiv"] = clear_arxiv_link(rec["srg_arxiv_url"])
if rec["srg_bibcode"]:
rec["srg_bibcode"] = re.sub(r"https?://ui\.adsabs\.harvard\.edu/abs/", "", rec["srg_bibcode"])
records.append(rec)
bar.update(t, advance=1)
else:
for title, link in iterator: for title, link in iterator:
links = parse_srg_paper_links(link) links = _parse_srg_paper_links(link)
rec = { rec = {
"title_srg": title, "title_srg": title,
"page_srg": link, "page_srg": link,
**links, **links,
} }
rec["srg_arxiv"] = clear_arxiv_link(rec["srg_arxiv_url"]) rec["srg_arxiv"] = _clear_arxiv_link(rec["srg_arxiv_url"])
if rec["srg_bibcode"]: if rec["srg_bibcode"]:
rec["srg_bibcode"] = re.sub(r"https?://ui\.adsabs\.harvard\.edu/abs/", "", rec["srg_bibcode"]) rec["srg_bibcode"] = re.sub(r"https?://ui\.adsabs\.harvard\.edu/abs/", "", rec["srg_bibcode"])
records.append(rec) records.append(rec)
console.print(f"[green]✔[/green] Loaded {len(records)} publications from SRG site")
return pd.DataFrame.from_records(records) return pd.DataFrame.from_records(records)