complete srgweb.triton module
This commit is contained in:
commit
cac9701e3b
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
__pycache__/
|
||||||
|
.quarto
|
10
_quarto.yml
Normal file
10
_quarto.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
quartodoc:
|
||||||
|
style: pkgdown
|
||||||
|
dir: reference
|
||||||
|
package: quartodoc
|
||||||
|
sections:
|
||||||
|
- title: Some functions
|
||||||
|
desc: Functions to inspect docstrings.
|
||||||
|
contents:
|
||||||
|
- get_object
|
||||||
|
- preview
|
20
examples.py
Normal file
20
examples.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
from srgweb.triton import (
|
||||||
|
triton_session,
|
||||||
|
list_programs,
|
||||||
|
get_program,
|
||||||
|
list_baskets,
|
||||||
|
get_basket
|
||||||
|
)
|
||||||
|
import keyring
|
||||||
|
|
||||||
|
# login to triton
|
||||||
|
sess = triton_session("uskov", keyring.get_password("PLAN_SRG", ""))
|
||||||
|
# list available programs
|
||||||
|
programs = list_programs(sess)
|
||||||
|
# download program SRGA
|
||||||
|
df = get_program(sess, program="SRGA")s
|
||||||
|
# list available baskets
|
||||||
|
baskets = list_baskets(sess)
|
||||||
|
# download basket ART-XC agns
|
||||||
|
df_basket = get_basket(sess, basket='ART-XC agns')
|
||||||
|
|
1
objects.json
Normal file
1
objects.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"project": "quartodoc", "version": "0.0.9999", "count": 4, "items": [{"name": "quartodoc.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "-"}, {"name": "quartodoc.autosummary.get_object", "domain": "py", "role": "function", "priority": "1", "uri": "reference/get_object.html#quartodoc.get_object", "dispname": "quartodoc.get_object"}, {"name": "quartodoc.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "-"}, {"name": "quartodoc.ast.preview", "domain": "py", "role": "function", "priority": "1", "uri": "reference/preview.html#quartodoc.preview", "dispname": "quartodoc.preview"}]}
|
16
pyproject.toml
Normal file
16
pyproject.toml
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
[project]
|
||||||
|
name = "srgweb"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = "python interface to internal web services"
|
||||||
|
authors = [
|
||||||
|
{name = "uskovgs",email = "uskov@cosmos.ru"}
|
||||||
|
]
|
||||||
|
readme = "README.md"
|
||||||
|
requires-python = ">=3.12"
|
||||||
|
dependencies = [
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
41
reference/get_object.qmd
Normal file
41
reference/get_object.qmd
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
# get_object { #quartodoc.get_object }
|
||||||
|
|
||||||
|
```python
|
||||||
|
get_object(
|
||||||
|
path,
|
||||||
|
object_name=None,
|
||||||
|
parser='numpy',
|
||||||
|
load_aliases=True,
|
||||||
|
dynamic=False,
|
||||||
|
loader=None,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Fetch a griffe object.
|
||||||
|
|
||||||
|
## Parameters {.doc-section .doc-section-parameters}
|
||||||
|
|
||||||
|
| Name | Type | Description | Default |
|
||||||
|
|--------------|-----------------|--------------------------------------------------------------------------------------------------------------------------------------------------------|------------|
|
||||||
|
| path | str | An import path to the object. This should have the form `path.to.module:object`. For example, `quartodoc:get_object` or `quartodoc:MdRenderer.render`. | _required_ |
|
||||||
|
| object_name | \'str \| None\' | (Deprecated). A function name. | `None` |
|
||||||
|
| parser | str | A docstring parser to use. | `'numpy'` |
|
||||||
|
| load_aliases | | For aliases that were imported from other modules, should we load that module? | `True` |
|
||||||
|
| dynamic | | Whether to dynamically import object. Useful if docstring is not hard-coded, but was set on object by running python code. | `False` |
|
||||||
|
|
||||||
|
## See Also {.doc-section .doc-section-see-also}
|
||||||
|
|
||||||
|
preview: print a user-friendly preview of a griffe object.
|
||||||
|
|
||||||
|
## Examples {.doc-section .doc-section-examples}
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> get_function("quartodoc", "get_function")
|
||||||
|
<Function('get_function', ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Returns {.doc-section .doc-section-returns}
|
||||||
|
|
||||||
|
| Name | Type | Description |
|
||||||
|
|--------|-----------|---------------|
|
||||||
|
| x | dc.Object | abc |
|
10
reference/index.qmd
Normal file
10
reference/index.qmd
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
# Function reference {.doc .doc-index}
|
||||||
|
|
||||||
|
## Some functions
|
||||||
|
|
||||||
|
Functions to inspect docstrings.
|
||||||
|
|
||||||
|
| | |
|
||||||
|
| --- | --- |
|
||||||
|
| [get_object](get_object.qmd#quartodoc.get_object) | Fetch a griffe object. |
|
||||||
|
| [preview](preview.qmd#quartodoc.preview) | Print a friendly representation of a griffe object (e.g. function, docstring) |
|
24
reference/preview.qmd
Normal file
24
reference/preview.qmd
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
# preview { #quartodoc.preview }
|
||||||
|
|
||||||
|
```python
|
||||||
|
preview(ast, max_depth=999, compact=False, as_string=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
Print a friendly representation of a griffe object (e.g. function, docstring)
|
||||||
|
|
||||||
|
## Examples {.doc-section .doc-section-examples}
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> from quartodoc import get_object
|
||||||
|
>>> obj = get_object("quartodoc", "get_object")
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> preview(obj.docstring.parsed)
|
||||||
|
...
|
||||||
|
```
|
||||||
|
|
||||||
|
```python
|
||||||
|
>>> preview(obj)
|
||||||
|
...
|
||||||
|
```
|
1
src/srgweb/__init__.py
Normal file
1
src/srgweb/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
__all__: list[str] = []
|
200
src/srgweb/artsurvey.py
Normal file
200
src/srgweb/artsurvey.py
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import pandas as pd
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.progress import Progress, SpinnerColumn, TextColumn
|
||||||
|
from astropy.coordinates import SkyCoord
|
||||||
|
import astropy.units as u
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
def artsurvey_session(
|
||||||
|
username: str = "",
|
||||||
|
password: str = "",
|
||||||
|
base_url: str = "http://arxiv.srg.rssi.ru/"
|
||||||
|
) -> requests.Session | None:
|
||||||
|
"""
|
||||||
|
Open ART-Survey session
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
username : str, optional
|
||||||
|
Your login username for ARTSurvey. Default is "".
|
||||||
|
password : str, optional
|
||||||
|
Your login password. Default is "".
|
||||||
|
base_url : str, optional
|
||||||
|
Base URL of the ARTSurvey system. Default is "http://arxiv.srg.rssi.ru/".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
requests.Session or None
|
||||||
|
Authenticated requests.Session object if login successful, otherwise None.
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> sess = artsurvey_session("bob", keyring.get_password("PLAN_SRG", ""))
|
||||||
|
"""
|
||||||
|
LOGIN_URL = f"{base_url.rstrip('/')}/login"
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "srgweb, Python package (uskov@cosmos.ru)"
|
||||||
|
}
|
||||||
|
|
||||||
|
with console.status(
|
||||||
|
f"Logging in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>"
|
||||||
|
):
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update(HEADERS)
|
||||||
|
|
||||||
|
# Step 1 — GET login page
|
||||||
|
resp = session.get(LOGIN_URL)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
form = soup.find("form")
|
||||||
|
if not form:
|
||||||
|
console.print("[red]Login form not found.[/red]")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Step 2 — Prepare form fields
|
||||||
|
action = form.get("action") or LOGIN_URL
|
||||||
|
full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action)
|
||||||
|
|
||||||
|
payload = {}
|
||||||
|
for input_ in form.find_all("input"):
|
||||||
|
name = input_.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
if name == "username":
|
||||||
|
payload[name] = username
|
||||||
|
elif name == "password":
|
||||||
|
payload[name] = password
|
||||||
|
else:
|
||||||
|
payload[name] = input_.get("value", "")
|
||||||
|
|
||||||
|
# Step 3 — POST login form
|
||||||
|
resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL})
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
text = soup.get_text(strip=True)
|
||||||
|
|
||||||
|
if "login: form is not valid" in text.lower():
|
||||||
|
console.print("[bold red]Incorrect login or password.[/bold red]")
|
||||||
|
return None
|
||||||
|
|
||||||
|
console.print(
|
||||||
|
f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link={base_url.rstrip('/')}][blue underline]<{base_url.rstrip('/')}>"
|
||||||
|
)
|
||||||
|
return session
|
||||||
|
|
||||||
|
def default_artsurvey_settings() -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Возвращает словарь с параметрами по умолчанию для фильтров ARTSurvey.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
Параметры фильтрации для формы ARTSurvey.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"sky": "allsky",
|
||||||
|
"survey": "179",
|
||||||
|
"band": "E0",
|
||||||
|
"exclude_survey": "",
|
||||||
|
"exclude_band": "E0",
|
||||||
|
"exclude_log_nfalse": "",
|
||||||
|
"exclude_log_ml_nfalse": "",
|
||||||
|
"sign_ml_min": "",
|
||||||
|
"sign_ml_max": "",
|
||||||
|
"log_nfalse_min": "",
|
||||||
|
"log_nfalse_max": "",
|
||||||
|
"log_ml_nfalse_min": "",
|
||||||
|
"log_ml_nfalse_max": "",
|
||||||
|
"detlike_min": "",
|
||||||
|
"detlike_max": "",
|
||||||
|
"exposure_min": "",
|
||||||
|
"ext_min": "",
|
||||||
|
"ext_max": "",
|
||||||
|
"class_startswith": "",
|
||||||
|
"cname_contains": "",
|
||||||
|
"category": "",
|
||||||
|
"exclude_category": "",
|
||||||
|
# "category_unclassified": "",
|
||||||
|
# "gaia_primary": "",
|
||||||
|
# "allwise_primary": "",
|
||||||
|
# "turk_possible": "",
|
||||||
|
# "dec_min": "",
|
||||||
|
# "dec_max": "",
|
||||||
|
# "ecl_lat_min": "",
|
||||||
|
# "ecl_lat_max": "",
|
||||||
|
# "circle_ra": "",
|
||||||
|
# "circle_dec": "",
|
||||||
|
# "circle_rmax_deg": ""
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_artsurvey_cat(session: requests.Session, survey_name: str = "S1-5v12345.12", **kwargs):
|
||||||
|
# 2. Получаем форму настроек
|
||||||
|
with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress:
|
||||||
|
progress.add_task(description="Обновление фильтров обзора", total=None)
|
||||||
|
settings_url = f"{base_url.rstrip('/')}/artsurvey"
|
||||||
|
resp = session.get(settings_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
form = soup.find("form")
|
||||||
|
if not form:
|
||||||
|
console.print("[red]❌ Не найдена форма фильтров на странице artsurvey.[/red]")
|
||||||
|
return pd.DataFrame()
|
||||||
|
|
||||||
|
# 3. Собираем параметры формы
|
||||||
|
payload = {}
|
||||||
|
|
||||||
|
# 3.1 input-поля
|
||||||
|
for input_tag in form.find_all("input"):
|
||||||
|
name = input_tag.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
payload[name] = input_tag.get("value", "")
|
||||||
|
|
||||||
|
# 3.2 select-поля
|
||||||
|
for select_tag in form.find_all("select"):
|
||||||
|
name = select_tag.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
options = select_tag.find_all("option")
|
||||||
|
selected = None
|
||||||
|
for option in options:
|
||||||
|
if option.text.strip() == survey_name:
|
||||||
|
selected = option.get("value")
|
||||||
|
break
|
||||||
|
if selected:
|
||||||
|
payload[name] = selected
|
||||||
|
|
||||||
|
# 3.3 пользовательские аргументы (приоритетные)
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
payload[k] = v
|
||||||
|
|
||||||
|
# 4. Отправляем фильтры
|
||||||
|
action = form.get("action") or settings_url
|
||||||
|
full_action = action if action.startswith("http") else requests.compat.urljoin(settings_url, action)
|
||||||
|
resp = session.post(full_action, data=payload, headers={"Referer": settings_url})
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
# 5. Загружаем CSV
|
||||||
|
with Progress(SpinnerColumn(), TextColumn("[progress.description]{task.description}")) as progress:
|
||||||
|
progress.add_task(description="Загрузка каталога ART-Survey (~10-20 секунд)", total=None)
|
||||||
|
csv_url = f"{base_url.rstrip('/')}/artsurvey/csv/all"
|
||||||
|
resp = session.get(csv_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
df = pd.read_csv(pd.compat.StringIO(resp.text), na_values=["", "None"])
|
||||||
|
|
||||||
|
# 6. Обработка
|
||||||
|
if df.empty:
|
||||||
|
console.print("[red]Каталог пуст. Возможно, фильтры слишком строгие.[/red]")
|
||||||
|
return df
|
||||||
|
|
||||||
|
if "ra" in df.columns and "dec" in df.columns:
|
||||||
|
coords = SkyCoord(ra=df["ra"].values * u.deg, dec=df["dec"].values * u.deg, frame="icrs")
|
||||||
|
df["skycoord"] = coords
|
||||||
|
|
||||||
|
return df
|
362
src/srgweb/triton.py
Normal file
362
src/srgweb/triton.py
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
import requests
|
||||||
|
import io
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from urllib.parse import urljoin
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
import pandas as pd
|
||||||
|
from janitor import clean_names
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
|
||||||
|
def triton_session(username: str = "", password: str = "") -> requests.Session | None:
|
||||||
|
"""
|
||||||
|
Open triton session
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
username : str, optional
|
||||||
|
Your Triton login username. Default is an empty string.
|
||||||
|
password : str, optional
|
||||||
|
Your Triton login password. Default is an empty string.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
requests.Session or None
|
||||||
|
An authenticated requests.Session object if login is successful, otherwise None.
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", ""))
|
||||||
|
"""
|
||||||
|
LOGIN_URL = "https://www.srg.cosmos.ru/logbook/login"
|
||||||
|
HEADERS = {
|
||||||
|
"User-Agent": "srgweb, Python package (uskov@cosmos.ru)"
|
||||||
|
}
|
||||||
|
with console.status(f"Logging in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]<https://www.srg.cosmos.ru/triton>"):
|
||||||
|
session = requests.Session()
|
||||||
|
session.headers.update(HEADERS)
|
||||||
|
|
||||||
|
resp = session.get(LOGIN_URL)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
# 2. Extract the first form
|
||||||
|
form = soup.find("form")
|
||||||
|
if not form:
|
||||||
|
console.log("[red]Login form not found.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
action = form.get("action") or LOGIN_URL
|
||||||
|
full_action = action if action.startswith("http") else requests.compat.urljoin(LOGIN_URL, action)
|
||||||
|
|
||||||
|
# 3. Prepare payload from form inputs
|
||||||
|
payload = {}
|
||||||
|
for input_ in form.find_all("input"):
|
||||||
|
name = input_.get("name")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
if name == "username":
|
||||||
|
payload[name] = username
|
||||||
|
elif name == "password":
|
||||||
|
payload[name] = password
|
||||||
|
else:
|
||||||
|
payload[name] = input_.get("value", "")
|
||||||
|
|
||||||
|
|
||||||
|
resp = session.post(full_action, data=payload, headers={"Referer": LOGIN_URL})
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
text = soup.get_text(strip=True)
|
||||||
|
if "login: form is not valid" in text.lower():
|
||||||
|
console.print("[bold red]Incorrect login or password.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
console.print(f"[green]✔[/green] Logged in as [cyan]{username}[/cyan] to [link=https://www.srg.cosmos.ru/triton][blue underline]<https://www.srg.cosmos.ru/triton>")
|
||||||
|
return session
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def list_programs(session: requests.Session) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Get the list of the Programs available in Triton.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
session : requests.Session
|
||||||
|
An authenticated requests.Session object.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
A dictionary mapping program names to their URLs.
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> sess = triton_session("bob", keyring.get_password("PLAN_SRG", ""))
|
||||||
|
>>> programs = list_programs(sess)
|
||||||
|
>>> print(programs)
|
||||||
|
"""
|
||||||
|
MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en"
|
||||||
|
BASE_URL = "https://www.srg.cosmos.ru"
|
||||||
|
|
||||||
|
resp = session.get(MAIN_PAGE)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
dropdown = soup.select_one(".dropdown-menu")
|
||||||
|
if dropdown is None:
|
||||||
|
raise RuntimeError("Dropdown menu not found on Triton main page")
|
||||||
|
|
||||||
|
links = dropdown.select("a")
|
||||||
|
|
||||||
|
items: dict[str, str] = {}
|
||||||
|
for a in links:
|
||||||
|
name = a.get_text(strip=True)
|
||||||
|
href = a.get("href") or ""
|
||||||
|
full_url = urljoin(BASE_URL, href)
|
||||||
|
if name:
|
||||||
|
items[name] = full_url
|
||||||
|
|
||||||
|
items["all"] = "https://www.srg.cosmos.ru/triton/show/all"
|
||||||
|
|
||||||
|
|
||||||
|
return items
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def list_baskets(session: requests.Session) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Get the list of baskets available in Triton.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
session : requests.Session
|
||||||
|
An authenticated requests.Session object.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
A dictionary mapping basket titles to their URLs (sorted by title).
|
||||||
|
"""
|
||||||
|
MAIN_PAGE = "https://www.srg.cosmos.ru/triton/en"
|
||||||
|
resp = session.get(MAIN_PAGE)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
tables = soup.select(".table")
|
||||||
|
if len(tables) < 2:
|
||||||
|
raise RuntimeError("Basket table not found on Triton main page")
|
||||||
|
|
||||||
|
# Parse the second table as baskets
|
||||||
|
df_basket = pd.read_html(io.StringIO(str(tables[1])))[0]
|
||||||
|
df_basket = clean_names(df_basket)
|
||||||
|
|
||||||
|
# Extract basket URLs from <a> tags in the second table
|
||||||
|
basket_urls = []
|
||||||
|
for a in tables[1].select("a"):
|
||||||
|
href = a.get("href", "")
|
||||||
|
if "show" in href:
|
||||||
|
basket_urls.append(f"https://www.srg.cosmos.ru{href}")
|
||||||
|
|
||||||
|
# Add URLs to DataFrame
|
||||||
|
if len(basket_urls) == len(df_basket):
|
||||||
|
df_basket["url"] = basket_urls
|
||||||
|
else:
|
||||||
|
# fallback: fill with empty strings if mismatch
|
||||||
|
df_basket["url"] = basket_urls + [""] * (len(df_basket) - len(basket_urls))
|
||||||
|
|
||||||
|
# Map title to url, sort by title
|
||||||
|
vals = dict(sorted(zip(df_basket["title"], df_basket["url"]), key=lambda x: x[0].lower()))
|
||||||
|
return vals
|
||||||
|
|
||||||
|
|
||||||
|
def _triton_read_table(session: requests.Session, page_download: str) -> pd.DataFrame:
|
||||||
|
|
||||||
|
resp = session.get(page_download)
|
||||||
|
resp.raise_for_status()
|
||||||
|
|
||||||
|
csv_bytes: bytes = resp.content
|
||||||
|
df = pd.read_csv(
|
||||||
|
io.BytesIO(csv_bytes),
|
||||||
|
dtype={
|
||||||
|
"RA": "float64",
|
||||||
|
"Dec": "float64",
|
||||||
|
"objid": "int64",
|
||||||
|
"time_spent": "string",
|
||||||
|
"Redshift_str": "string",
|
||||||
|
"Redshift_err": "string",
|
||||||
|
"Status": "string",
|
||||||
|
},
|
||||||
|
na_values=["", "None"],
|
||||||
|
keep_default_na=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
df = clean_names(df)
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def _triton_parse_src_urls(session: requests.Session, program_url: str) -> dict[str, str]:
|
||||||
|
"""
|
||||||
|
Returns a dictionary mapping source names to their URLs for each row in the program table.
|
||||||
|
Only <a> tags without a class and with 'show' in href are considered, matching the R logic.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
session : requests.Session
|
||||||
|
An authenticated requests.Session object.
|
||||||
|
program_url : str
|
||||||
|
The URL of the program page to parse.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
dict[str, str]
|
||||||
|
A dictionary where keys are source names and values are their corresponding URLs.
|
||||||
|
"""
|
||||||
|
resp = session.get(program_url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
|
||||||
|
# Find all <a> inside <td> inside .col-sm-12, no class, href contains 'show'
|
||||||
|
anchors = soup.select(".col-sm-12 td a:not([class])")
|
||||||
|
name_url: dict[str, str] = {}
|
||||||
|
for a in anchors:
|
||||||
|
href = a.get("href", "")
|
||||||
|
if "show" in href:
|
||||||
|
name = a.get_text(strip=True)
|
||||||
|
full_url = requests.compat.urljoin("https://www.srg.cosmos.ru", href)
|
||||||
|
name_url[name] = full_url
|
||||||
|
return name_url
|
||||||
|
|
||||||
|
|
||||||
|
def get_program(session: requests.Session, program: str = "SRGA") -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Download and parse a program table from the Triton system.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
session : requests.Session
|
||||||
|
An authenticated requests.Session object.
|
||||||
|
program : str, optional
|
||||||
|
The name of the program to download (case-insensitive). Default is "SRGA".
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pd.DataFrame
|
||||||
|
A DataFrame containing the program table with cleaned column names.
|
||||||
|
Columns include at least 'name', 'type', 'url', and, if astropy is installed, 'skycoord'.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If the specified program is not found among available programs.
|
||||||
|
|
||||||
|
Example
|
||||||
|
-------
|
||||||
|
>>> sess = triton_session("bob", keyring.get_password("MYTOKEN", ""))
|
||||||
|
>>> df = get_program(sess, "SRGA")
|
||||||
|
>>> print(df.head())
|
||||||
|
"""
|
||||||
|
|
||||||
|
with console.status("Check available programs", spinner="dots"):
|
||||||
|
programs = list_programs(session)
|
||||||
|
programs_lc = {k.lower(): v for k, v in programs.items()}
|
||||||
|
console.print("[green]✔[/green] Check available programs")
|
||||||
|
|
||||||
|
prog_key = program.lower()
|
||||||
|
if prog_key not in programs_lc:
|
||||||
|
raise ValueError(f"Program {program} not found. Choose one of: {list(programs.keys())}")
|
||||||
|
|
||||||
|
|
||||||
|
with console.status(f"Parse URLs from the [green]{program}[/green] program", spinner="dots"):
|
||||||
|
src_vec = _triton_parse_src_urls(session, programs_lc[prog_key])
|
||||||
|
console.print(f"[green]✔[/green] Parse URLs from the [green]{program}[/green] program")
|
||||||
|
|
||||||
|
|
||||||
|
download_pages: dict[str, str] = {}
|
||||||
|
for name, url in programs.items():
|
||||||
|
if "program" in url:
|
||||||
|
download_pages[name.lower()] = url.replace("show", "download")
|
||||||
|
else:
|
||||||
|
download_pages[name.lower()] = "https://www.srg.cosmos.ru/triton/download"
|
||||||
|
|
||||||
|
with console.status(f"Downloading table from [green]{program}[/green]", spinner="dots"):
|
||||||
|
df = _triton_read_table(session, download_pages[prog_key])
|
||||||
|
console.print(f"[green]✔[/green] Downloading table from [green]{program}[/green]")
|
||||||
|
|
||||||
|
df["type"] = df["observernotes"].str.extract(r"type:\s?(.*)\n?", expand=False)
|
||||||
|
df["url"] = df["name"].map(src_vec).astype("string")
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
from astropy.coordinates import SkyCoord
|
||||||
|
df["skycoord"] = SkyCoord(df['ra'], df['dec'], frame='icrs', unit='deg')
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
console.print("[green]✔[/green] Finished")
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def get_basket(session: requests.Session, basket: str = "") -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Download and parse a basket table from the Triton system.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
session : requests.Session
|
||||||
|
An authenticated requests.Session object.
|
||||||
|
basket : str, optional
|
||||||
|
The name of the basket to download (case-insensitive).
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pd.DataFrame
|
||||||
|
A DataFrame containing the basket table with cleaned column names and a 'skycoord' column if astropy is installed.
|
||||||
|
|
||||||
|
Raises
|
||||||
|
------
|
||||||
|
ValueError
|
||||||
|
If the specified basket is not found among available baskets.
|
||||||
|
"""
|
||||||
|
with console.status("Check available baskets", spinner="dots"):
|
||||||
|
baskets = list_baskets(session)
|
||||||
|
baskets_lc = {k.lower(): v for k, v in baskets.items()}
|
||||||
|
console.print("[green]✔[/green] Check available baskets")
|
||||||
|
|
||||||
|
basket_key = basket.lower()
|
||||||
|
if basket_key not in baskets_lc:
|
||||||
|
raise ValueError(f"Basket {basket} not found. Choose one of: {list(baskets.keys())}")
|
||||||
|
|
||||||
|
with console.status(f"Parse basket table from [green]{basket}[/green]", spinner="dots"):
|
||||||
|
url = baskets_lc[basket_key]
|
||||||
|
resp = session.get(url)
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
tables = soup.select(".table")
|
||||||
|
if not tables:
|
||||||
|
raise RuntimeError("No tables found on basket page")
|
||||||
|
df_basket = pd.read_html(io.StringIO(str(tables[0])))[0]
|
||||||
|
df_basket = clean_names(df_basket)
|
||||||
|
console.print(f"[green]✔[/green] Parse basket table from [green]{basket}[/green]")
|
||||||
|
|
||||||
|
try:
|
||||||
|
from astropy.coordinates import SkyCoord
|
||||||
|
df_basket['skycoord'] = df_basket.apply(
|
||||||
|
lambda r: SkyCoord(f"{r['rahms']} {r['decdms']}", unit=("hourangle", "deg"), frame="icrs"),
|
||||||
|
axis=1
|
||||||
|
)
|
||||||
|
except ImportError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
console.print("[green]✔[/green] Finished")
|
||||||
|
return df_basket
|
||||||
|
|
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
Loading…
x
Reference in New Issue
Block a user