mirror of
https://github.com/ARUP-CAS/aiscr-qgis-amcr-viewer.git
synced 2026-06-19 04:12:55 +02:00
Feature/aktualizace heslaru (#32)
* přechod od statického hesláře k dynamickému načítání z OAI-PMH API AMČR * aplikace načítání heslářů a task management (backend) * frontend + debugging * aktualizace přibaleného hesláře * kosmetické drobnosti * ošetření speciální případů při stahování hesláře (katastr, okres) + s tím spojená aktualizace přiloženého hesláře
This commit is contained in:
+173
-84
@@ -2,10 +2,37 @@
|
||||
import os
|
||||
import csv
|
||||
import requests
|
||||
import xml.etree.ElementTree as ET
|
||||
import time
|
||||
from qgis.core import QgsMessageLog, Qgis
|
||||
|
||||
# Define paths for the plugin and its codelists directory
|
||||
PLUGIN_DIR = os.path.dirname(__file__)
|
||||
CODELISTS_DIR = os.path.join(PLUGIN_DIR, 'codelists')
|
||||
BASE_URL = "https://api.aiscr.cz/2.2/oai"
|
||||
OUTPUT_FILE = os.path.join(CODELISTS_DIR, 'heslar.csv')
|
||||
|
||||
slovnicek = {
|
||||
'obdobi' : 'heslo:obdobi',
|
||||
'typ_akce' : 'heslo:akce_typ',
|
||||
'areal' : 'heslo:areal',
|
||||
'kraj' : 'ruian_kraj',
|
||||
'organizace' : 'organizace',
|
||||
'okres' : 'ruian_okres',
|
||||
'katastr' : 'ruian_katastr',
|
||||
'vedouci' : 'osoba',
|
||||
'pian_presnost' : 'heslo:pian_presnost',
|
||||
'typ_lokality' : 'heslo:lokalita_typ',
|
||||
'druh_lokality' : 'heslo:lokalita_druh',
|
||||
'jistota' : 'heslo:jistota_urceni',
|
||||
'lokalita_zachovalost' : 'heslo:stav_dochovani'
|
||||
}
|
||||
|
||||
NS = {
|
||||
'oai': 'http://www.openarchives.org/OAI/2.0/',
|
||||
'dc': 'http://purl.org/dc/elements/1.1/',
|
||||
'oai_dc': 'http://www.openarchives.org/OAI/2.0/oai_dc/'
|
||||
}
|
||||
|
||||
def ensure_codelists_dir():
|
||||
"""Creates the codelists directory if it does not exist."""
|
||||
@@ -46,101 +73,163 @@ def parse_codelist_file(filename, target_dict=None):
|
||||
# Assign the extracted code to the corresponding label within the category
|
||||
target_dict[cat][label] = clean
|
||||
except Exception as e:
|
||||
print(f"AMČR Codelist Read Error for {filename}: {e}")
|
||||
QgsMessageLog.logMessage(f"AMČR Codelist Read Error for {filename}: {e}", "AMČR", Qgis.Critical)
|
||||
|
||||
return target_dict
|
||||
|
||||
def load_all_data():
|
||||
"""Loads all static and dynamic codelists during plugin startup."""
|
||||
ensure_codelists_dir()
|
||||
|
||||
# Initialize the base structure with empty dictionaries for all expected categories
|
||||
categorized_data = {
|
||||
'obdobi': {}, 'typ_akce': {}, 'areal': {},
|
||||
'kraj': {}, 'organizace': {}, 'okres': {}, 'katastr': {},
|
||||
'vedouci': {}, 'pian_presnost': {}, 'typ_lokality': {}, 'druh_lokality': {},
|
||||
'jistota': {}, 'lokalita_zachovalost': {}
|
||||
}
|
||||
|
||||
# Parse the default static codelist and the dynamically generated leaders codelist
|
||||
categorized_data = {k: {} for k in slovnicek.keys()}
|
||||
parse_codelist_file('heslar.csv', categorized_data)
|
||||
parse_codelist_file('vedouci.csv', categorized_data)
|
||||
|
||||
return categorized_data
|
||||
|
||||
def download_vedouci():
|
||||
"""Fetches the list of leaders from the AMČR API and saves it to a CSV file."""
|
||||
def fetch_set(internal_name, api_set, task=None):
|
||||
dataset = []
|
||||
params = {
|
||||
"verb": "ListRecords",
|
||||
"metadataPrefix": "oai_dc",
|
||||
"set": api_set
|
||||
}
|
||||
|
||||
while True:
|
||||
# Kontrola zrušení v každém kroku
|
||||
if task and task.isCanceled():
|
||||
return None
|
||||
|
||||
try:
|
||||
response = requests.get(BASE_URL, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
root = ET.fromstring(response.content)
|
||||
|
||||
records = root.findall('.//oai:record', NS)
|
||||
for rec in records:
|
||||
metadata = rec.find('.//oai_dc:dc', NS)
|
||||
if metadata is not None:
|
||||
# Kód (identifier)
|
||||
kod = metadata.find('dc:identifier', NS).text if metadata.find('dc:identifier', NS) is not None else ""
|
||||
|
||||
# Název (title) - filtrujeme systémové popisky "AMČR - ..."
|
||||
titles = metadata.findall('dc:title', NS)
|
||||
nazev = ""
|
||||
for t in titles:
|
||||
if t.text and not t.text.startswith("AMČR -") and not t.text.startswith(" AMČR -"):
|
||||
nazev = t.text
|
||||
break
|
||||
# Pokud by náhodou žádný title neprošel filtrem, vezmeme první dostupný
|
||||
if not nazev and titles:
|
||||
nazev = titles[0].text
|
||||
|
||||
specialni_pripady = ['okres', 'katastr']
|
||||
|
||||
if internal_name in specialni_pripady:
|
||||
kod = nazev
|
||||
|
||||
dataset.append({
|
||||
'Název': nazev,
|
||||
'Kód': kod,
|
||||
'Kategorie': internal_name
|
||||
})
|
||||
|
||||
# Stránkování
|
||||
token = root.find('.//oai:resumptionToken', NS)
|
||||
if token is not None and token.text:
|
||||
params = {
|
||||
"verb": "ListRecords",
|
||||
"resumptionToken": token.text
|
||||
}
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
QgsMessageLog.logMessage(f"Chyba u setu {api_set}: {e}", "AMČR", Qgis.Warning)
|
||||
break
|
||||
|
||||
return dataset
|
||||
|
||||
def download_heslare(task=None):
|
||||
"""Fetches the codelists from the AMČR API and saves it to a CSV file."""
|
||||
ensure_codelists_dir()
|
||||
all_data = []
|
||||
total_sets = len(slovnicek)
|
||||
|
||||
# API endpoint for fetching facet data for leaders
|
||||
url = "https://digiarchiv.aiscr.cz/api/search/query?entity=akce&sort=datestamp%20desc&page=0&onlyFacets=True&rows=0"
|
||||
for index, (interni, api_nazev) in enumerate(slovnicek.items()):
|
||||
# Pokud uživatel task zrušil v liště QGISu
|
||||
if task and task.isCanceled():
|
||||
return False
|
||||
|
||||
QgsMessageLog.logMessage(f"Zpracovávám kategorii: {interni}...", "AMČR", Qgis.Info)
|
||||
|
||||
# Nyní předáváme task správně do upravené funkce
|
||||
data = fetch_set(interni, api_nazev, task=task)
|
||||
|
||||
if data is None:
|
||||
return False # Bylo zrušeno uprostřed stahování
|
||||
|
||||
all_data.extend(data)
|
||||
|
||||
# Reportování postupu (0-100)
|
||||
if task:
|
||||
progress = (index + 1) / total_sets * 100
|
||||
task.setProgress(progress)
|
||||
|
||||
# Uložení do CSV
|
||||
with open(OUTPUT_FILE, 'w', newline='', encoding='utf-8-sig') as f:
|
||||
fieldnames = ['Název', 'Kód', 'Kategorie']
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter=';')
|
||||
writer.writeheader()
|
||||
writer.writerows(all_data)
|
||||
|
||||
return True
|
||||
|
||||
def refresh_globals():
|
||||
"""Znovu načte data ze souborů do globálních proměnných."""
|
||||
global OBDOBI, TYP_AKCE, AREAL, KRAJE, ORGANIZACE, OKRESY, KATASTRY
|
||||
global VEDOUCI, PIAN_PRESNOST, TYP_LOKALITY, DRUH_LOKALITY, JISTOTA, LOKALITA_ZACHOVALOST
|
||||
|
||||
try:
|
||||
# Execute the GET request with a 20-second timeout
|
||||
r = requests.get(url, timeout=20)
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
# Extract the leaders list from the JSON response using safe dict getters
|
||||
vedouci_list = data.get('facet_counts', {}).get('f_vedouci', [])
|
||||
if not vedouci_list:
|
||||
vedouci_list = data.get('facet_counts', {}).get('facet_fields', {}).get('f_vedouci', [])
|
||||
|
||||
csv_path = os.path.join(CODELISTS_DIR, 'vedouci.csv')
|
||||
|
||||
count = 0
|
||||
|
||||
# Open the target CSV file for writing without extra blank lines
|
||||
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
|
||||
writer = csv.writer(f, delimiter=';')
|
||||
|
||||
# Write the standard header required by the parser function
|
||||
writer.writerow(['Název', 'Kód', 'Kategorie'])
|
||||
|
||||
# Iterate through the API results and format them for the CSV
|
||||
for item in vedouci_list:
|
||||
name = None
|
||||
if isinstance(item, dict):
|
||||
name = item.get('name')
|
||||
elif isinstance(item, str):
|
||||
name = item
|
||||
|
||||
# Ignore pure numbers (which are usually counts) and write valid names
|
||||
if name and not str(name).isdigit():
|
||||
writer.writerow([name, name, 'vedouci'])
|
||||
count += 1
|
||||
|
||||
return True, f"Staženo {count} jmen."
|
||||
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
# Initialize global codelist data when the module is imported
|
||||
_DATA = load_all_data()
|
||||
|
||||
# Safely extract individual categories into global variables for easy access across the plugin
|
||||
OBDOBI = _DATA.get('obdobi', {})
|
||||
TYP_AKCE = _DATA.get('typ_akce', {})
|
||||
AREAL = _DATA.get('areal', {})
|
||||
KRAJE = _DATA.get('kraj', {})
|
||||
ORGANIZACE = _DATA.get('organizace', {})
|
||||
OKRESY = _DATA.get('okres', {})
|
||||
KATASTRY = _DATA.get('katastr', {})
|
||||
VEDOUCI = _DATA.get('vedouci', {})
|
||||
PIAN_PRESNOST = _DATA.get('pian_presnost', {})
|
||||
TYP_LOKALITY = _DATA.get('typ_lokality', {})
|
||||
DRUH_LOKALITY = _DATA.get('druh_lokality', {})
|
||||
JISTOTA = _DATA.get('jistota', {})
|
||||
LOKALITA_ZACHOVALOST = _DATA.get('lokalita_zachovalost', {})
|
||||
|
||||
def refresh_vedouci_cache():
|
||||
"""Reloads only the 'vedouci.csv' file to quickly update the cache without full initialization."""
|
||||
# Parse only the targeted file containing the updated leaders
|
||||
temp_data = parse_codelist_file('vedouci.csv')
|
||||
new_vedouci = temp_data.get('vedouci', {})
|
||||
data = load_all_data()
|
||||
|
||||
# Clear the existing global dictionary and update it with the fresh data
|
||||
OBDOBI.clear()
|
||||
OBDOBI.update(data.get('obdobi', {}))
|
||||
TYP_AKCE.clear()
|
||||
TYP_AKCE.update(data.get('typ_akce', {}))
|
||||
AREAL.clear()
|
||||
AREAL.update(data.get('areal', {}))
|
||||
KRAJE.clear()
|
||||
KRAJE.update(data.get('kraj', {}))
|
||||
ORGANIZACE.clear()
|
||||
ORGANIZACE.update(data.get('organizace', {}))
|
||||
OKRESY.clear()
|
||||
OKRESY.update(data.get('okres', {}))
|
||||
KATASTRY.clear()
|
||||
KATASTRY.update(data.get('katastr', {}))
|
||||
VEDOUCI.clear()
|
||||
VEDOUCI.update(new_vedouci)
|
||||
|
||||
return len(VEDOUCI)
|
||||
VEDOUCI.update(data.get('vedouci', {}))
|
||||
PIAN_PRESNOST.clear()
|
||||
PIAN_PRESNOST.update(data.get('pian_presnost', {}))
|
||||
TYP_LOKALITY.clear()
|
||||
TYP_LOKALITY.update(data.get('typ_lokality', {}))
|
||||
DRUH_LOKALITY.clear()
|
||||
DRUH_LOKALITY.update(data.get('druh_lokality', {}))
|
||||
JISTOTA.clear()
|
||||
JISTOTA.update(data.get('jistota', {}))
|
||||
LOKALITA_ZACHOVALOST.clear()
|
||||
LOKALITA_ZACHOVALOST.update(data.get('lokalita_zachovalost', {}))
|
||||
|
||||
# Inicializace prázdných diktů, které se naplní hned pod tím
|
||||
OBDOBI = {}
|
||||
TYP_AKCE = {}
|
||||
AREAL = {}
|
||||
KRAJE = {}
|
||||
ORGANIZACE = {}
|
||||
OKRESY = {}
|
||||
KATASTRY = {}
|
||||
VEDOUCI = {}
|
||||
PIAN_PRESNOST = {}
|
||||
TYP_LOKALITY = {}
|
||||
DRUH_LOKALITY = {}
|
||||
JISTOTA = {}
|
||||
LOKALITA_ZACHOVALOST = {}
|
||||
|
||||
refresh_globals()
|
||||
Reference in New Issue
Block a user